PIO assembler (#25)

* WIP pio assembler

* add comparison tests against the official assembler

* tokenizer passing tests

* add buildkite pipeline

* comparison tests

* assembler now outputting a few programs similar to official

* largely complete


Co-authored-by: Matt Knight <mattnite@proton.me>
Vesim 1 year ago committed by GitHub
parent 666f444037
commit b0e346608e
No known key found for this signature in database

@ -1,2 +1,2 @@
- command: zig build
- command: zig build test

.gitignore vendored

@ -1,2 +1,5 @@

@ -39,8 +39,19 @@ pub fn addPiPicoExecutable(
// project requires multiple HALs, it accepts microzig as a param
pub fn build(b: *Builder) !void {
const optimize = b.standardOptimizeOption(.{});
var examples = Examples.init(b, optimize);
//var examples = Examples.init(b, optimize);
const pio_tests = b.addTest(.{
.root_source_file = .{
.path = "src/hal/pio.zig",
.optimize = optimize,
const test_step = b.step("test", "run unit tests");
fn root() []const u8 {
@ -55,6 +66,7 @@ pub const Examples = struct {
pwm: *microzig.EmbeddedExecutable,
spi_master: *microzig.EmbeddedExecutable,
uart: *microzig.EmbeddedExecutable,
squarewave: *microzig.EmbeddedExecutable,
//uart_pins: microzig.EmbeddedExecutable,
flash_program: *microzig.EmbeddedExecutable,

deps/microzig vendored

@ -1 +1 @@
Subproject commit 23482a6986252e0eeff54a04abc0aac8a08d25d7
Subproject commit dabc9325cdee394ff66e28c91803cb814954b157

@ -0,0 +1,53 @@
//! Hello world for the PIO module: generating a square wave
const std = @import("std");
const microzig = @import("microzig");
const rp2040 = microzig.hal;
const gpio = rp2040.gpio;
const Pio = rp2040.pio.Pio;
const StateMachine = rp2040.pio.StateMachine;
const squarewave_program = (rp2040.pio.assemble(
\\.program squarewave
\\ set pindirs, 1 ; Set pin to output
\\ set pins, 1 [1] ; Drive pin high and then delay for one cycle
\\ set pins, 0 ; Drive pin low
\\ jmp again ; Set PC to label `again`
) catch
@panic("failed to assemble program"))
pub fn main() void {
// Pick one PIO instance arbitrarily. We're also arbitrarily picking state
// machine 0 on this PIO instance (the state machines are numbered 0 to 3
// inclusive).
const pio: Pio = .pio0;
const sm: StateMachine = .sm0;
// Load the assembled program directly into the PIO's instruction memory.
// Each PIO instance has a 32-slot instruction memory, which all 4 state
// machines can see. The system has write-only access.
for (squarewave_program.instructions, 0..) |insn, i|
pio.get_instruction_memory()[i] = insn;
// Configure state machine 0 to run at sysclk/2.5. The state machines can
// run as fast as one instruction per clock cycle, but we can scale their
// speed down uniformly to meet some precise frequency target, e.g. for a
// UART baud rate. This register has 16 integer divisor bits and 8
// fractional divisor bits.
pio.set_clkdiv_int_frac(sm, 2, 0x80);
// There are five pin mapping groups (out, in, set, side-set, jmp pin)
// which are used by different instructions or in different circumstances.
// Here we're just using SET instructions. Configure state machine 0 SETs
// to affect GPIO 0 only; then configure GPIO0 to be controlled by PIO0,
// as opposed to e.g. the processors.
pio.set_out_pins(sm, 0, 1);
gpio.set_function(0, .pio0);
// Set the state machine running. The PIO CTRL register is global within a
// PIO instance, so you can start/stop multiple state machines
// simultaneously. We're using the register's hardware atomic set alias to
// make one bit high without doing a read-modify-write on the register.
pio.set_enabled(sm, true);

@ -1,5 +1,6 @@
const std = @import("std");
const microzig = @import("microzig");
const regs = microzig.chip.registers;
const SIO = microzig.chip.peripherals.SIO;
pub const adc = @import("hal/adc.zig");
pub const pins = @import("hal/pins.zig");
@ -14,6 +15,7 @@ pub const resets = @import("hal/resets.zig");
pub const irq = @import("hal/irq.zig");
pub const rom = @import("hal/rom.zig");
pub const flash = @import("hal/flash.zig");
pub const pio = @import("hal/pio.zig");
pub const clock_config = clocks.GlobalConfiguration.init(.{
.ref = .{ .source = .src_xosc },
@ -32,5 +34,5 @@ pub fn init() void {
pub fn get_cpu_id() u32 {
return regs.SIO.CPUID.*;
return SIO.CPUID.*;

@ -87,7 +87,7 @@ pub const PullUpDown = enum {
pub inline fn set_pull(comptime gpio: u32, mode: ?PullUpDown) void {
const gpio_name = comptime std.fmt.comptimePrint("GPIO{d}", .{gpio});
const gpio_regs = @field(PADS_BANK0, gpio_name);
const gpio_regs = &@field(PADS_BANK0, gpio_name);
if (mode == null) {
gpio_regs.modify(.{ .PUE = 0, .PDE = 0 });

@ -0,0 +1,11 @@
pub const Lock = struct {
impl: u32,
pub fn claim() Lock {
pub fn unlock(lock: Lock) void {

@ -1,5 +1,5 @@
const microzig = @import("microzig");
const regs = microzig.chip.registers;
const NVIC = microzig.chip.peripherals.NVIC;
// TODO: the register definitions are improved now, use them instead of raw
// writes/reads
@ -10,11 +10,11 @@ fn get_interrupt_mask(comptime interrupt_name: []const u8) u32 {
pub fn enable(comptime interrupt_name: []const u8) void {
const mask = comptime get_interrupt_mask(interrupt_name);
regs.SCS.NVIC.ICPR.raw = mask;
regs.SCS.NVIC.ISER.raw = mask;
NVIC.ICPR.raw = mask;
NVIC.ISER.raw = mask;
pub fn disable(comptime interrupt_name: []const u8) void {
const mask = comptime get_interrupt_mask(interrupt_name);
regs.SCS.NVIC.ICER.raw = mask;
NVIC.ICER.raw = mask;

@ -0,0 +1,358 @@
//! A PIO instance can load a single `Bytecode`, it has to be loaded into memory
const std = @import("std");
const microzig = @import("microzig");
const PIO = microzig.chip.types.peripherals.PIO0;
const PIO0 = microzig.chip.peripherals.PIO0;
const PIO1 = microzig.chip.peripherals.PIO1;
const gpio = @import("gpio.zig");
const assembler = @import("pio/assembler.zig");
pub const Bytecode = Bytecode;
pub const Program = assembler.Program;
pub const assemble = assembler.assemble;
var used_instruction_space: [2]u32 = [_]u32{ 0, 0 };
var claimed_state_machines: [2]u4 = [_]u4{ 0, 0 };
pub const Join = enum {
pub const Status = enum {
pub const Configuration = struct {
pin: u32,
pub const StateMachine = enum(u2) {
pub const Pio = enum(u1) {
pio0 = 0,
pio1 = 1,
fn get_regs(self: Pio) *volatile PIO {
return switch (self) {
.pio0 => PIO0,
.pio1 => PIO1,
pub fn get_instruction_memory(self: Pio) *volatile [32]u32 {
const regs = self.get_regs();
return @ptrCast(*volatile [32]u32, &regs.INSTR_MEM0);
pub fn gpio_init(self: Pio, comptime pin: u5) void {
gpio.set_function(pin, switch (self) {
.pio0 => .pio0,
.pio1 => .pio1,
pub fn load(self: Pio, bytecode: Bytecode) !void {
_ = self;
_ = bytecode;
fn can_add_program_at_offset(self: Pio, program: Program, offset: u5) bool {
if (program.origin) |origin|
if (origin != offset)
return false;
const used_mask = used_instruction_space[@enumToInt(self)];
const program_mask = program.get_mask();
// We can add the program if the masks don't overlap, if there is
// overlap the result of a bitwise AND will have a non-zero result
return (used_mask & program_mask) == 0;
fn find_offset_for_program(self: Pio, program: Program) !u5 {
return if (program.origin) |origin|
if (self.can_add_program_at_offset(program, origin))
else for (0..32 - program.isntruction.len) |i| {
if (self.can_add_program_at_offset(program, i))
break i;
} else error.NoSpace;
fn add_program_at_offset_unlocked(self: Pio, program: Program, offset: u5) !void {
if (!self.can_add_program_at_offset(program, offset))
return error.NoSpace;
const instruction_memory = self.get_instruction_memory();
for (program.instructions, offset..) |insn, i|
instruction_memory[i] = insn;
const program_mask = program.get_mask();
used_instruction_space[@enumToInt(self)] |= program_mask << offset;
/// Public functions will need to lock independently, so only exposing this function for now
pub fn add_program(self: Pio, program: Program) !void {
// TODO: const lock = hw.Lock.claim()
// defer lock.unlock();
const offset = try self.find_offset_for_program_unlocked();
try self.add_program_at_offset(program, offset);
pub fn claim_unused_state_machine(self: Pio) !StateMachine {
// TODO: const lock = hw.Lock.claim()
// defer lock.unlock();
const claimed_mask = claimed_state_machines[@enumToInt(self)];
return for (0..4) |i| {
const sm_mask = (@as(u4, 1) << @intCast(u2, i));
if (0 == (claimed_mask & sm_mask)) {
claimed_state_machines[@enumToInt(self)] |= sm_mask;
break @intToEnum(StateMachine, i);
} else error.NoSpace;
pub const StateMachineRegs = extern struct {
clkdiv: @TypeOf(PIO0.SM0_CLKDIV),
execctrl: @TypeOf(PIO0.SM0_EXECCTRL),
shiftctrl: @TypeOf(PIO0.SM0_SHIFTCTRL),
addr: @TypeOf(PIO0.SM0_ADDR),
instr: @TypeOf(PIO0.SM0_INSTR),
pinctrl: @TypeOf(PIO0.SM0_PINCTRL),
fn get_sm_regs(self: Pio, sm: StateMachine) *volatile StateMachineRegs {
const pio_regs = self.get_regs();
return switch (sm) {
.sm0 => @ptrCast(*volatile StateMachineRegs, &pio_regs.SM0_CLKDIV),
.sm1 => @ptrCast(*volatile StateMachineRegs, &pio_regs.SM1_CLKDIV),
.sm2 => @ptrCast(*volatile StateMachineRegs, &pio_regs.SM2_CLKDIV),
.sm3 => @ptrCast(*volatile StateMachineRegs, &pio_regs.SM3_CLKDIV),
pub fn join_fifo(self: Pio, sm: StateMachine, join: Join) void {
const tx: u1 = switch (join) {
.tx => 1,
.rx => 0,
.none => 0,
const rx: u1 = switch (join) {
.tx => 0,
.rx => 1,
.none => 0,
const sm_regs = self.get_sm_regs(sm);
.FJOIN_TX = tx,
.FJOIN_RX = rx,
pub fn set_clkdiv_int_frac(self: Pio, sm: StateMachine, div_int: u16, div_frac: u8) void {
if (div_int == 0 and div_frac != 0)
@panic("invalid params");
const sm_regs = self.get_sm_regs(sm);
.INT = div_int,
.FRAC = div_frac,
.reserved8 = 0,
pub fn set_out_shift(self: Pio, sm: StateMachine, args: struct {
shift_right: bool,
autopull: bool,
pull_threshold: u5,
}) void {
const sm_regs = self.get_sm_regs(sm);
.OUT_SHIFTDIR = @boolToInt(args.shift_right),
.AUTOPULL = @boolToInt(args.autopull),
.PULL_THRESH = args.pull_threshold,
pub fn set_out_pins(self: Pio, sm: StateMachine, base: u5, count: u5) void {
const sm_regs = self.get_sm_regs(sm);
.OUT_BASE = base,
.OUT_COUNT = count,
pub fn set_sideset_pins(self: Pio, sm: StateMachine, base: u5) void {
const sm_regs = self.get_sm_regs(sm);
sm_regs.pinctrl.modify(.{ .SIDESET_BASE = base });
pub fn is_tx_fifo_full(self: Pio, sm: StateMachine) bool {
const regs = self.get_regs();
const txfull = regs.FSTAT.read().TXFULL;
return (txfull & (@as(u4, 1) << @enumToInt(sm))) != 0;
pub fn get_tx_fifo(self: Pio, sm: StateMachine) *volatile u32 {
const regs = self.get_regs();
return switch (sm) {
.sm0 => &regs.TXF0,
.sm1 => &regs.TXF1,
.sm2 => &regs.TXF2,
.sm3 => &regs.TXF3,
pub fn blocking_write(self: Pio, sm: StateMachine, value: u32) void {
while (self.is_tx_fifo_full(sm)) {}
const fifo_ptr = self.get_tx_fifo(sm);
fifo_ptr.* = value;
pub fn encode_jmp() void {}
//static inline uint _pio_encode_instr_and_args(enum pio_instr_bits instr_bits, uint arg1, uint arg2) {
// valid_params_if(PIO_INSTRUCTIONS, arg1 <= 0x7);
// uint32_t major = _pio_major_instr_bits(instr_bits);
// if (major == pio_instr_bits_in || major == pio_instr_bits_out) {
// assert(arg2 && arg2 <= 32);
// } else {
// assert(arg2 <= 31);
// }
// return instr_bits | (arg1 << 5u) | (arg2 & 0x1fu);
//static inline uint pio_encode_jmp(uint addr) {
// return _pio_encode_instr_and_args(pio_instr_bits_jmp, 0, addr);
pub fn set_enabled(self: Pio, sm: StateMachine, enabled: bool) void {
const regs = self.get_regs();
var value = regs.CTRL.read();
if (enabled)
value.SM_ENABLE |= @as(u4, 1) << @enumToInt(sm)
value.SM_ENABLE &= ~(@as(u4, 1) << @enumToInt(sm));
pub fn sm_init(self: Pio, sm: StateMachine, initial_pc: u5, config: StateMachineRegs) void {
// Halt the machine, set some sensible defaults
self.set_enabled(sm, false);
self.set_config(sm, config);
// Clear FIFO debug flags
//const uint32_t fdebug_sm_mask =
//pio->fdebug = fdebug_sm_mask << sm;
// Finally, clear some internal SM state
self.exec(sm, encode_jmp(initial_pc));
// state machine configuration helpers:
// - set_out_pins
// - set_set_pins
// - set_in_pins
// - set_sideset_pins
// - set_sideset
// - calculate_clkdiv_from_float
// - set_clkdiv
// - set_wrap
// - set_jmp_pin
// - set_in_shift
// - set_out_shift
// - set_fifo_join
// - set_out_special
// - set_mov_status
// PIO:
// - can_add_program
// - add_program_at_offset
// - add_program
// - remove_program
// - clear_instruction_memory
// - sm_init
// - sm_set_enabled
// - sm_mask_enabled
// - sm_restart
// - restart_sm_mask
// - sm_clkdiv_restart
// - clkdiv_restart_sm_mask
// - enable_sm_mask_in_sync
// - set_irq0_source_enabled
// - set_irq1_source_enabled
// - set_irq0_source_mask_enabled
// - set_irq1_source_mask_enabled
// - set_irqn_source_enabled
// - set_irqn_source_mask_enabled
// - interrupt_get
// - interrupt_clear
// - sm_get_pc
// - sm_exec
// - sm_is_exec_stalled
// - sm_exec_wait_blocking
// - sm_set_wrap
// - sm_set_out_pins
// - sm_set_set_pins
// - sm_set_in_pins
// - sm_set_sideset_pins
// - sm_put
// - sm_get
// - sm_is_rx_fifo_full
// - sm_is_rx_fifo_empty
// - sm_is_rx_fifo_level
// - sm_is_tx_fifo_full
// - sm_is_tx_fifo_empty
// - sm_is_tx_fifo_level
// - sm_put_blocking
// - sm_get_blocking
// - sm_drain_tx_fifo
// - sm_set_clkdiv_int_frac
// - sm_set_clkdiv
// - sm_clear_fifos
// - sm_set_pins
// - sm_set_pins_with_mask
// - sm_set_pindirs_with_mask
// - sm_set_consecutive_pindirs
// - sm_claim
// - claim_sm_mask
// - sm_unclaim
// - claim_unused_sm
// - sm_is_claimed
test "pio" {

@ -0,0 +1,140 @@
const std = @import("std");
const assert = std.debug.assert;
const tokenizer = @import("assembler/tokenizer.zig");
const encoder = @import("assembler/encoder.zig");
pub const TokenizeOptions = tokenizer.Options;
pub const EncodeOptions = encoder.Options;
pub const Define = struct {
name: []const u8,
value: i64,
pub const Program = struct {
name: []const u8,
defines: []const Define,
instructions: []const u16,
origin: ?u5,
side_set: ?encoder.SideSet,
wrap_target: ?u5,
wrap: ?u5,
pub fn get_mask(program: Program) u32 {
return (1 << program.instructions.len) - 1;
pub const Output = struct {
defines: []const Define,
programs: []const Program,
pub fn get_program_by_name(
comptime output: Output,
comptime name: []const u8,
) Program {
return for (output.programs) |program| {
if (std.mem.eql(u8, program.name, program))
break program;
} else @panic(std.fmt.comptimePrint("program '{s}' not found", .{name}));
pub fn get_define_by_name(
comptime output: Output,
comptime name: []const u8,
) u32 {
return for (output.defines) |define| {
if (std.mem.eql(u8, define.name, define))
break define;
} else @panic(std.fmt.comptimePrint("define '{s}' not found", .{name}));
pub const AssembleOptions = struct {
tokenize: TokenizeOptions = .{},
encode: EncodeOptions = .{},
pub const Diagnostics = struct {
message: std.BoundedArray(u8, 256),
index: u32,
pub fn init(index: u32, comptime fmt: []const u8, args: anytype) Diagnostics {
var ret = Diagnostics{
.message = std.BoundedArray(u8, 256).init(0) catch unreachable,
.index = index,
ret.message.writer().print(fmt, args) catch unreachable;
return ret;
pub fn assemble_impl(comptime source: []const u8, diags: *?Diagnostics, options: AssembleOptions) !Output {
const tokens = try tokenizer.tokenize(source, diags, options.tokenize);
const encoder_output = try encoder.encode(tokens.slice(), diags, options.encode);
var programs = std.BoundedArray(Program, options.encode.max_programs).init(0) catch unreachable;
for (encoder_output.programs.slice()) |bounded|
try programs.append(bounded.to_exported_program());
return Output{
.defines = blk: {
var tmp = std.BoundedArray(Define, options.encode.max_defines).init(0) catch unreachable;
for (encoder_output.global_defines.slice()) |define|
.name = define.name,
.value = define.value,
}) catch unreachable;
break :blk tmp.slice();
.programs = programs.slice(),
fn format_compile_error(comptime message: []const u8, comptime source: []const u8, comptime index: u32) []const u8 {
var line_str: []const u8 = "";
var line_num: u32 = 1;
var column: u32 = 0;
var line_it = std.mem.tokenize(u8, source, "\n\r");
while (line_it.next()) |line| : (line_num += 1) {
line_str = line_str ++ "\n" ++ line;
if (line_it.index > index) {
column = line.len - (line_it.index - index);
line_str = line;
return std.fmt.comptimePrint(
\\failed to assemble PIO code:
, .{
[_]u8{' '} ** column,
[_]u8{' '} ** column,
pub fn assemble(comptime source: []const u8, comptime options: AssembleOptions) !Output {
var diags: ?Diagnostics = null;
return assemble_impl(source, &diags, options) catch |err| if (diags) |d|
@compileError(format_compile_error(d.message.slice(), source, d.index))
test "tokenizer and encoder" {
test "comparison" {

@ -0,0 +1,706 @@
//! Expressions for PIO are weird. The documentation states that an expression,
//! when used as a "value", requires parenthesis. However the official PIO
//! assembler allows for defines with a value of `::1` which is an expression.
//! Annoyingly, looking at the parser, it seems that it supports a number of
//! other operations not covered in the documentation.
ops: BoundedOperations,
values: BoundedValues,
const std = @import("std");
const assert = std.debug.assert;
const assembler = @import("../assembler.zig");
const Diagnostics = assembler.Diagnostics;
const encoder = @import("encoder.zig");
const DefineWithIndex = encoder.DefineWithIndex;
const Expression = @This();
const BoundedOperations = std.BoundedArray(OperationWithIndex, 32);
const BoundedValues = std.BoundedArray(Value, 32);
const Value = struct {
str: []const u8,
index: u32,
const OperationWithIndex = struct {
op: Operation,
index: u32,
const call_depth_max = 64;
pub const Operation = enum {
// operations shown in pioasm's parser:
// - OR
// - AND
// - XOR
pub fn format(
op: Operation,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
try writer.print("{s}", .{switch (op) {
.add => "add",
.sub => "sub",
.mul => "mul",
.div => "div",
.negative => "neg",
.bit_reverse => "rev",
.value => "val",
pub fn tokenize(
str: []const u8,
/// starting index of the expression
index: u32,
diags: *?Diagnostics,
) !Expression {
var ops = BoundedOperations.init(0) catch unreachable;
var values = BoundedValues.init(0) catch unreachable;
const call_depth: u32 = 0;
try recursive_tokenize(call_depth, &ops, &values, str, index, diags);
return Expression{
.ops = ops,
.values = values,
const TrimResult = struct {
str: []const u8,
index: u32,
fn default(str: []const u8) TrimResult {
return TrimResult{
.str = str,
.index = 0,
fn trim_outer_parenthesis(str: []const u8) TrimResult {
// if the outer characters (not including whitespace) are parenthesis, then include the inside string
// scan the prefix
const start: usize = for (str, 0..) |c, i| {
switch (c) {
' ',
=> {},
'(' => break i + 1,
else => return TrimResult.default(str),
} else return TrimResult.default(str);
const end: usize = blk: {
var i = str.len - 1;
break :blk while (i > 0) : (i -= 1) {
switch (str[i]) {
' ',
=> {},
')' => break i,
else => return TrimResult.default(str),
} else return TrimResult.default(str);
return TrimResult{
.str = str[start..end],
.index = @intCast(u32, start),
fn recursive_tokenize(
call_depth: u32,
ops: *BoundedOperations,
values: *BoundedValues,
str: []const u8,
index: u32,
diags: *?Diagnostics,
) !void {
assert(call_depth < call_depth_max);
const trim_result = trim_outer_parenthesis(str);
const expr_str = trim_result.str;
const expr_index = index + trim_result.index;
var parenthesis_found = false;
var depth: u32 = 0;
var i = @intCast(i32, expr_str.len - 1);
outer: while (i >= 0) : (i -= 1) {
const idx = @intCast(u32, i);
// TODO: how about if the expression is fully enveloped in parenthesis?
switch (expr_str[idx]) {
')' => {
depth += 1;
parenthesis_found = true;
continue :outer;
'(' => {
if (depth == 0) {
diags.* = Diagnostics.init(expr_index + idx, "mismatched parenthesis", .{});
return error.MismatchedParenthesis;
depth -= 1;
parenthesis_found = true;
if (depth != 0)
continue :outer;
else => if (depth > 0)
continue :outer,
const op: Operation = switch (expr_str[idx]) {
'+' => .add,
// needs context to determine if it's a negative or subtraction
'-' => blk: {
// it's negative if we have nothing to the left. If an operator
// is found to the left we continue
const is_negative = (i == 0) or is_negative: {
var j = i - 1;
while (j >= 0) : (j -= 1) {
const jdx = @intCast(u32, j);
switch (expr_str[jdx]) {
' ', '\t' => continue,
'+', '-', '*', '/' => continue :outer,
else => break :is_negative false,
break :is_negative true;
if (is_negative) {
try ops.append(.{
.op = .negative,
.index = expr_index + idx,
try recursive_tokenize(call_depth + 1, ops, values, expr_str[idx + 1 ..], expr_index + idx + 1, diags);
break :blk .sub;
'*' => .mul,
'/' => .div,
':' => {
const is_bit_reverse = (i != 0) and expr_str[idx - 1] == ':';
if (is_bit_reverse) {
try ops.append(.{
.op = .bit_reverse,
.index = expr_index + idx - 1,
try recursive_tokenize(call_depth + 1, ops, values, expr_str[idx + 1 ..], expr_index + idx + 1, diags);
i -= 1;
return error.InvalidBitReverse;
else => continue,
try ops.append(.{
.op = op,
.index = expr_index + idx,
try recursive_tokenize(call_depth + 1, ops, values, expr_str[idx + 1 ..], expr_index + idx + 1, diags);
try recursive_tokenize(call_depth + 1, ops, values, expr_str[0..idx], expr_index, diags);
} else if (parenthesis_found) {
try recursive_tokenize(call_depth + 1, ops, values, expr_str, expr_index, diags);
} else {
// if we hit this path, then the full string has been scanned, and no operators
const trimmed = std.mem.trim(u8, expr_str, " \t");
const value_index = expr_index + @intCast(u32, std.mem.indexOf(u8, expr_str, trimmed).?);
try ops.append(.{
.op = .value,
.index = value_index,
try values.append(.{
.str = trimmed,
.index = value_index,
if (depth != 0) {
diags.* = Diagnostics.init(expr_index + @intCast(u32, i), "mismatched parenthesis", .{});
return error.MismatchedParenthesis;
const EvaluatedValue = struct {
num: i128,
index: u32,
pub fn format(
eval_value: EvaluatedValue,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
try writer.print("{}", .{eval_value.num});
pub fn evaluate(
self: Expression,
define_lists: []const []const DefineWithIndex,
diags: *?Diagnostics,
) !i128 {
var values = std.BoundedArray(EvaluatedValue, 32).init(0) catch unreachable;
// parse/extract values into numbers
for (self.values.slice()) |entry| {
const value: EvaluatedValue = if (std.fmt.parseInt(i128, entry.str, 0)) |num| .{
.num = num,
.index = entry.index,
} else |_| blk: {
// if it fails, try looking up the strings in definitions
for (define_lists) |define_list|
for (define_list) |define|
if (std.mem.eql(u8, define.name, entry.str))
break :blk .{
.num = define.value,
.index = define.index,
diags.* = Diagnostics.init(entry.index, "value doesn't parse as an integer, or define not found", .{});
return error.UnresolvedValue;
try values.append(value);
return if (self.ops.len == 1) blk: {
assert(self.values.len == 1);
assert(self.ops.get(0).op == .value);
break :blk values.get(0).num;
} else blk: {
const result = try recursive_evaluate(0, self.ops.slice(), values.slice(), diags);
assert(result.consumed.ops == self.ops.len);
assert(result.consumed.values == self.values.len);
break :blk result.value;
const RecursiveEvalResult = struct {
value: i128,
consumed: struct {
ops: u32,
values: u32,
index: u32,
fn recursive_evaluate(
call_depth: u32,
owis: []const OperationWithIndex,
values: []const EvaluatedValue,
diags: *?Diagnostics,
) !RecursiveEvalResult {
assert(call_depth < call_depth_max);
assert(owis.len != 0);
assert(values.len != 0);
return switch (owis[0].op) {
.value => .{
.value = values[0].num,
.index = values[0].index,
.consumed = .{
.ops = 1,
.values = 1,
.negative => .{
.value = -values[0].num,
.index = values[0].index,
.consumed = .{
.ops = 2,
.values = 1,
.bit_reverse => blk: {
if (values[0].num >= std.math.maxInt(u32) or
values[0].num < std.math.minInt(i32))
diags.* = Diagnostics.init(owis[0].index, "Evaluated value does not fit in 32-bits: 0x{x}", .{values[0].num});
return error.EvaluatedValueDoesntFit;
break :blk .{
.value = @bitCast(i128, @bitReverse(@bitCast(u128, values[0].num)) >> (128 - 32)),
.index = values[0].index,
.consumed = .{
.ops = 2,
.values = 1,
.add, .sub, .mul, .div => blk: {
const rhs = try recursive_evaluate(call_depth + 1, owis[1..], values, diags);
const lhs = try recursive_evaluate(call_depth + 1, owis[1 + rhs.consumed.ops ..], values[rhs.consumed.values..], diags);
break :blk .{
.consumed = .{
.ops = 1 + lhs.consumed.ops + rhs.consumed.ops,
.values = lhs.consumed.values + rhs.consumed.values,
.index = lhs.index,
.value = switch (owis[0].op) {
.add => lhs.value + rhs.value,
.sub => lhs.value - rhs.value,
.mul => lhs.value * rhs.value,
.div => div: {
if (rhs.value == 0) {
diags.* = Diagnostics.init(owis[0].index, "divide by zero (denominator evaluates to zero)", .{});
return error.DivideByZero;
// TODO: other requirement for @divExact
break :div @divExact(lhs.value, rhs.value);
else => unreachable,
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
const expectEqualStrings = std.testing.expectEqualStrings;
fn expect_equal_slices_of_values(
expected: []const Value,
actual: []const Value,
) !void {
for (expected, actual) |e, a| {
try expectEqualStrings(e.str, a.str);
try expectEqual(e.index, a.index);
fn expect_equal_slices_of_ops(
expected: []const OperationWithIndex,
actual: []const OperationWithIndex,
) !void {
for (expected, actual) |e, a| {
try expectEqual(e.op, a.op);
try expectEqual(e.index, a.index);
test "expr.tokenize.integer" {
var diags: ?Diagnostics = null;
const expr = try tokenize("1", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 0, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.integer.parenthesis" {
var diags: ?Diagnostics = null;
const expr = try tokenize("(1)", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 1, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 1, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.integer.double parenthesis" {
var diags: ?Diagnostics = null;
const expr = try tokenize("((1))", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 2, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 2, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.symbol" {
var diags: ?Diagnostics = null;
const expr = try tokenize("BAR", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 0, .str = "BAR" },
}, expr.values.slice());
test "expr.tokenize.add" {
var diags: ?Diagnostics = null;
const expr = try tokenize("1 + 2", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 2, .op = .add },
.{ .index = 4, .op = .value },
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 4, .str = "2" },
.{ .index = 0, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.add.chain" {
var diags: ?Diagnostics = null;
const expr = try tokenize("1 + 2 + 3", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 6, .op = .add },
.{ .index = 8, .op = .value },
.{ .index = 2, .op = .add },
.{ .index = 4, .op = .value },
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 8, .str = "3" },
.{ .index = 4, .str = "2" },
.{ .index = 0, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.sub" {
var diags: ?Diagnostics = null;
const expr = try tokenize("1 - 2", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 2, .op = .sub },
.{ .index = 4, .op = .value },
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 4, .str = "2" },
.{ .index = 0, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.sub.nospace" {
var diags: ?Diagnostics = null;
const expr = try tokenize("1-2", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 1, .op = .sub },
.{ .index = 2, .op = .value },
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 2, .str = "2" },
.{ .index = 0, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.sub.negative" {
var diags: ?Diagnostics = null;
const expr = try tokenize("1 - -2", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 2, .op = .sub },
.{ .index = 4, .op = .negative },
.{ .index = 5, .op = .value },
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 5, .str = "2" },
.{ .index = 0, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.mul" {
var diags: ?Diagnostics = null;
const expr = try tokenize("1 * 2", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 2, .op = .mul },
.{ .index = 4, .op = .value },
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 4, .str = "2" },
.{ .index = 0, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.div" {
var diags: ?Diagnostics = null;
const expr = try tokenize("1 / 2", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 2, .op = .div },
.{ .index = 4, .op = .value },
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 4, .str = "2" },
.{ .index = 0, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.negative" {
var diags: ?Diagnostics = null;
const expr = try tokenize("-1", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 0, .op = .negative },
.{ .index = 1, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 1, .str = "1" },
}, expr.values.slice());
test "expr.tokenize.bit reverse" {
var diags: ?Diagnostics = null;
const expr = try tokenize("::1", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 0, .op = .bit_reverse },
.{ .index = 2, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 2, .str = "1" },
}, expr.values.slice());
test "expr.tokenzie.parenthesis" {
var diags: ?Diagnostics = null;
const expr = try tokenize("1 * (::2 + (12 / 3)) - 5", 0, &diags);
try expect_equal_slices_of_ops(&.{
.{ .index = 21, .op = .sub },
.{ .index = 23, .op = .value },
.{ .index = 2, .op = .mul },
.{ .index = 9, .op = .add },
.{ .index = 15, .op = .div },
.{ .index = 17, .op = .value },
.{ .index = 12, .op = .value },
.{ .index = 5, .op = .bit_reverse },
.{ .index = 7, .op = .value },
.{ .index = 0, .op = .value },
}, expr.ops.slice());
try expect_equal_slices_of_values(&.{
.{ .index = 23, .str = "5" },
.{ .index = 17, .str = "3" },
.{ .index = 12, .str = "12" },
.{ .index = 7, .str = "2" },
.{ .index = 0, .str = "1" },
}, expr.values.slice());
fn evaluate_test(expected: i128, str: []const u8, define_list: []const DefineWithIndex) !void {
var diags: ?Diagnostics = null;
const expr = tokenize(str, 0, &diags) catch |err| {
if (diags) |d|
std.log.err("{}: {s}", .{ err, d.message.slice() });
return err;
const actual = expr.evaluate(&.{define_list}, &diags) catch |err| {
if (diags) |d|
std.log.err("{}: {s}", .{ err, d.message.slice() })
std.log.err("{}", .{err});
return err;
try expectEqual(expected, actual);
test "expr.evaluate.integer" {
try evaluate_test(1, "1", &.{});
test "expr.evaluate.symbol" {
try evaluate_test(5, "BAR", &.{
.name = "BAR",
.value = 5,
.index = 0,
test "expr.evaluate.add" {
try evaluate_test(3, "1 + 2", &.{});
try evaluate_test(6, "1 + 2 + 3", &.{});
test "expr.evaluate.sub" {
try evaluate_test(1, "2 - 1", &.{});
try evaluate_test(1, "(NUM_CYCLES - 1)", &.{
.name = "NUM_CYCLES",
.value = 2,
.index = 1,
test "expr.evaluate.mul" {
try evaluate_test(9, "3 * 3", &.{});
test "expr.evaluate.div" {
try evaluate_test(3, "9 / 3", &.{});
try evaluate_test(3, "9 / 3", &.{});
test "expr.evaluate.negative" {
try evaluate_test(-3, "-3", &.{});
test "expr.evaluate.bit reverse" {
try evaluate_test(0x80000000, "::1", &.{});
test "expr.evaluate.parenthesis" {
try evaluate_test(15, "5 * (1 + 2)", &.{});
try evaluate_test(1 * (@bitReverse(@as(u32, 2)) + (12 / 3)) - 5, "1 * (::2 + (12 / 3)) - 5", &.{});

@ -0,0 +1,168 @@
const std = @import("std");
const assembler = @import("../assembler.zig");
const tokenizer = @import("tokenizer.zig");
const c = @cImport({
@cDefine("PICO_NO_HARDWARE", "1");
fn pio_comparison(comptime source: []const u8) !void {
const output = comptime assembler.assemble(source, .{}) catch unreachable;
try std.testing.expect(output.programs.len > 0);
inline for (output.programs) |program| {
const expected_insns = @field(c, program.name ++ "_program_instructions");
for (program.instructions, expected_insns) |actual, expected| {
std.log.debug("expected: 0x{x}", .{expected});
std.log.debug(" actual: 0x{x}", .{actual});
std.log.debug("", .{});
for (program.instructions, expected_insns) |actual, expected|
try std.testing.expectEqual(expected, actual);
test "pio.comparison.addition" {
try pio_comparison(@embedFile("comparison_tests/addition.pio"));
test "pio.comparison.apa102" {
try pio_comparison(@embedFile("comparison_tests/apa102.pio"));
test "pio.comparison.blink" {
try pio_comparison(@embedFile("comparison_tests/blink.pio"));
test "pio.comparison.clocked_input" {
try pio_comparison(@embedFile("comparison_tests/clocked_input.pio"));
test "pio.comparison.differential_manchester" {
try pio_comparison(@embedFile("comparison_tests/differential_manchester.pio"));
test "pio.comparison.hello" {
try pio_comparison(@embedFile("comparison_tests/hello.pio"));
test "pio.comparison.hub75" {
try pio_comparison(@embedFile("comparison_tests/hub75.pio"));
test "pio.comparison.i2c" {
try pio_comparison(@embedFile("comparison_tests/i2c.pio"));
test "pio.comparison.manchester_encoding" {
try pio_comparison(@embedFile("comparison_tests/manchester_encoding.pio"));
test "pio.comparison.nec_carrier_burst" {
try pio_comparison(@embedFile("comparison_tests/nec_carrier_burst.pio"));
test "pio.comparison.nec_carrier_control" {
try pio_comparison(@embedFile("comparison_tests/nec_carrier_control.pio"));
test "pio.comparison.nec_receive" {
try pio_comparison(@embedFile("comparison_tests/nec_receive.pio"));
test "pio.comparison.pio_serialiser" {
try pio_comparison(@embedFile("comparison_tests/pio_serialiser.pio"));
test "pio.comparison.pwm" {
try pio_comparison(@embedFile("comparison_tests/pwm.pio"));
test "pio.comparison.quadrature_encoder" {
try pio_comparison(@embedFile("comparison_tests/quadrature_encoder.pio"));
test "pio.comparison.resistor_dac" {
try pio_comparison(@embedFile("comparison_tests/resistor_dac.pio"));
test "pio.comparison.spi" {
try pio_comparison(@embedFile("comparison_tests/spi.pio"));
test "pio.comparison.squarewave" {
try pio_comparison(@embedFile("comparison_tests/squarewave.pio"));
test "pio.comparison.squarewave_fast" {
try pio_comparison(@embedFile("comparison_tests/squarewave_fast.pio"));
test "pio.comparison.squarewave_wrap" {
try pio_comparison(@embedFile("comparison_tests/squarewave_wrap.pio"));
test "pio.comparison.st7789_lcd" {
try pio_comparison(@embedFile("comparison_tests/st7789_lcd.pio"));
test "pio.comparison.uart_rx" {
try pio_comparison(@embedFile("comparison_tests/uart_rx.pio"));
test "pio.comparison.uart_tx" {
try pio_comparison(@embedFile("comparison_tests/uart_tx.pio"));
test "pio.comparison.ws2812" {
try pio_comparison(@embedFile("comparison_tests/ws2812.pio"));

@ -0,0 +1,4 @@
= PIO example programs for testing
These were all taken from https://github.com/raspberrypi/pico-examples[the official pico examples repo].
The headers are generated using `pioasm`.

@ -0,0 +1,33 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program addition
; Pop two 32 bit integers from the TX FIFO, add them together, and push the
; result to the TX FIFO. Autopush/pull should be disabled as we're using
; explicit push and pull instructions.
; This program uses the two's complement identity x + y == ~(~x - y)
mov x, ~osr
mov y, osr
jmp test ; this loop is equivalent to the following C code:
incr: ; while (y--)
jmp x-- test ; x--;
test: ; This has the effect of subtracting y from x, eventually.
jmp y-- incr
mov isr, ~x
% c-sdk {
static inline void addition_program_init(PIO pio, uint sm, uint offset) {
pio_sm_config c = addition_program_get_default_config(offset);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,52 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// -------- //
// addition //
// -------- //
#define addition_wrap_target 0
#define addition_wrap 8
static const uint16_t addition_program_instructions[] = {
// .wrap_target
0x80a0, // 0: pull block
0xa02f, // 1: mov x, !osr
0x80a0, // 2: pull block
0xa047, // 3: mov y, osr
0x0006, // 4: jmp 6
0x0046, // 5: jmp x--, 6
0x0085, // 6: jmp y--, 5
0xa0c9, // 7: mov isr, !x
0x8020, // 8: push block
// .wrap
static const struct pio_program addition_program = {
.instructions = addition_program_instructions,
.length = 9,
.origin = -1,
static inline pio_sm_config addition_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + addition_wrap_target, offset + addition_wrap);
return c;
static inline void addition_program_init(PIO pio, uint sm, uint offset) {
pio_sm_config c = addition_program_get_default_config(offset);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,89 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program apa102_mini
.side_set 1
; This is really just a TX-only SPI. CLK is side-set pin 0, DIN is OUT pin 0.
; Autopull enabled, threshold 32.
; Every word (32 bits) written to the FIFO will be shifted out in its entirety, MSB-first.
out pins, 1 side 0 ; Stall here when no data (still asserts clock low)
nop side 1
% c-sdk {
#include "hardware/clocks.h"
static inline void apa102_mini_program_init(PIO pio, uint sm, uint offset,
uint baud, uint pin_clk, uint pin_din) {
pio_sm_set_pins_with_mask(pio, sm, 0, (1u << pin_clk) | (1u << pin_din));
pio_sm_set_pindirs_with_mask(pio, sm, ~0u, (1u << pin_clk) | (1u << pin_din));
pio_gpio_init(pio, pin_clk);
pio_gpio_init(pio, pin_din);
pio_sm_config c = apa102_mini_program_get_default_config(offset);
sm_config_set_out_pins(&c, pin_din, 1);
sm_config_set_sideset_pins(&c, pin_clk);
// Shift to right, autopull with threshold 32
sm_config_set_out_shift(&c, false, true, 32);
// Deeper FIFO as we're not doing any RX
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
// We transmit 1 bit every 2 execution cycles
float div = (float)clock_get_hz(clk_sys) / (2 * baud);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
.program apa102_rgb555
; Alternative program to unpack two RGB555 pixels from a FIFO word and transmit.
; This makes it easier to DMA large buffers without processor involvement.
; OSR: shift to right
; ISR: shift to right
; To set brightness, set ISR to bit-reverse of 5-bit brightness,
; followed by 111. (00...00_b0b1b2b3b4_111)
; DMA pixel format is 0RRRRRGGGGGBBBBB x2 (15 bpp, 2px per FIFO word)
; APA102 command structure:
; increasing time ---->>
; | byte 3 | byte 2 | byte 1 | byte 0 |
; |7 0|7 0|7 0|7 0|
; -------------------------------------
; Start Frame |00000000|00000000|00000000|00000000|
; Stop Frame |11111111|11111111|11111111|11111111|
public pixel_out:
; pixel_out formats an APA102 colour command in the ISR.
; bit_run shifts 32 bits out of the ISR, with clock.
pull ifempty
set x, 2
in osr, 5
out null, 5
in null, 3
jmp x-- colour_loop
in y, 8
mov isr, ::isr ; reverse for msb-first wire order
out null, 1
public bit_run:
; in isr, n rotates ISR by n bits (right rotation only)
; Use this to perform out shifts from ISR, via mov pins
set x, 31
set pins, 0
mov pins, isr [6]
set pins, 1
in isr, 1 [6]
jmp x-- bit_out

@ -0,0 +1,105 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ----------- //
// apa102_mini //
// ----------- //
#define apa102_mini_wrap_target 0
#define apa102_mini_wrap 1
static const uint16_t apa102_mini_program_instructions[] = {
// .wrap_target
0x6001, // 0: out pins, 1 side 0
0xb042, // 1: nop side 1
// .wrap
static const struct pio_program apa102_mini_program = {
.instructions = apa102_mini_program_instructions,
.length = 2,
.origin = -1,
static inline pio_sm_config apa102_mini_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + apa102_mini_wrap_target, offset + apa102_mini_wrap);
sm_config_set_sideset(&c, 1, false, false);
return c;
#include "hardware/clocks.h"
static inline void apa102_mini_program_init(PIO pio, uint sm, uint offset,
uint baud, uint pin_clk, uint pin_din) {
pio_sm_set_pins_with_mask(pio, sm, 0, (1u << pin_clk) | (1u << pin_din));
pio_sm_set_pindirs_with_mask(pio, sm, ~0u, (1u << pin_clk) | (1u << pin_din));
pio_gpio_init(pio, pin_clk);
pio_gpio_init(pio, pin_din);
pio_sm_config c = apa102_mini_program_get_default_config(offset);
sm_config_set_out_pins(&c, pin_din, 1);
sm_config_set_sideset_pins(&c, pin_clk);
// Shift to right, autopull with threshold 32
sm_config_set_out_shift(&c, false, true, 32);
// Deeper FIFO as we're not doing any RX
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
// We transmit 1 bit every 2 execution cycles
float div = (float)clock_get_hz(clk_sys) / (2 * baud);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
// ------------- //
// apa102_rgb555 //
// ------------- //
#define apa102_rgb555_wrap_target 0
#define apa102_rgb555_wrap 14
#define apa102_rgb555_offset_pixel_out 0u
#define apa102_rgb555_offset_bit_run 9u
static const uint16_t apa102_rgb555_program_instructions[] = {
// .wrap_target
0x80e0, // 0: pull ifempty block
0xe022, // 1: set x, 2
0x40e5, // 2: in osr, 5
0x6065, // 3: out null, 5
0x4063, // 4: in null, 3
0x0042, // 5: jmp x--, 2
0x4048, // 6: in y, 8
0xa0d6, // 7: mov isr, ::isr
0x6061, // 8: out null, 1
0xe03f, // 9: set x, 31
0xe000, // 10: set pins, 0
0xa606, // 11: mov pins, isr [6]
0xe001, // 12: set pins, 1
0x46c1, // 13: in isr, 1 [6]
0x004a, // 14: jmp x--, 10
// .wrap
static const struct pio_program apa102_rgb555_program = {
.instructions = apa102_rgb555_program_instructions,
.length = 15,
.origin = -1,
static inline pio_sm_config apa102_rgb555_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + apa102_rgb555_wrap_target, offset + apa102_rgb555_wrap);
return c;

@ -0,0 +1,34 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
; SET pin 0 should be mapped to your LED GPIO
.program blink
pull block
out y, 32
mov x, y
set pins, 1 ; Turn LED on
jmp x-- lp1 ; Delay for (x + 1) cycles, x is a 32 bit number
mov x, y
set pins, 0 ; Turn LED off
jmp x-- lp2 ; Delay for the same number of cycles again
.wrap ; Blink forever!
% c-sdk {
// this is a raw helper function for use by the user which sets up the GPIO output, and configures the SM to output on a particular pin
void blink_program_init(PIO pio, uint sm, uint offset, uint pin) {
pio_gpio_init(pio, pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_sm_config c = blink_program_get_default_config(offset);
sm_config_set_set_pins(&c, pin, 1);
pio_sm_init(pio, sm, offset, &c);

@ -0,0 +1,54 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ----- //
// blink //
// ----- //
#define blink_wrap_target 2
#define blink_wrap 7
static const uint16_t blink_program_instructions[] = {
0x80a0, // 0: pull block
0x6040, // 1: out y, 32
// .wrap_target
0xa022, // 2: mov x, y
0xe001, // 3: set pins, 1
0x0044, // 4: jmp x--, 4
0xa022, // 5: mov x, y
0xe000, // 6: set pins, 0
0x0047, // 7: jmp x--, 7
// .wrap
static const struct pio_program blink_program = {
.instructions = blink_program_instructions,
.length = 8,
.origin = -1,
static inline pio_sm_config blink_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + blink_wrap_target, offset + blink_wrap);
return c;
// this is a raw helper function for use by the user which sets up the GPIO output, and configures the SM to output on a particular pin
void blink_program_init(PIO pio, uint sm, uint offset, uint pin) {
pio_gpio_init(pio, pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_sm_config c = blink_program_get_default_config(offset);
sm_config_set_set_pins(&c, pin, 1);
pio_sm_init(pio, sm, offset, &c);

@ -0,0 +1,51 @@
; Copyright (c) 2021 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program clocked_input
; Sample bits using an external clock, and push groups of bits into the RX FIFO.
; - IN pin 0 is the data pin
; - IN pin 1 is the clock pin
; - Autopush is enabled, threshold 8
; This program samples data with each rising clock edge (like mode 0 or mode 3
; SPI). The data is actually sampled one system clock cycle after the rising
; edge is observed, so a clock ratio of at least input_clk < clk_sys / 6 is
; recommended for good sampling alignment.
wait 0 pin 1
wait 1 pin 1
in pins, 1
% c-sdk {
static inline void clocked_input_program_init(PIO pio, uint sm, uint offset, uint pin) {
pio_sm_config c = clocked_input_program_get_default_config(offset);
// Set the IN base pin to the provided `pin` parameter. This is the data
// pin, and the next-numbered GPIO is used as the clock pin.
sm_config_set_in_pins(&c, pin);
// Set the pin directions to input at the PIO
pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false);
// Connect these GPIOs to this PIO block
pio_gpio_init(pio, pin);
pio_gpio_init(pio, pin + 1);
// Shifting to left matches the customary MSB-first ordering of SPI.
false, // Shift-to-right = false (i.e. shift to left)
true, // Autopush enabled
8 // Autopush threshold = 8
// We only receive, so disable the TX FIFO to make the RX FIFO deeper.
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
// Load our configuration, and start the program from the beginning
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,64 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ------------- //
// clocked_input //
// ------------- //
#define clocked_input_wrap_target 0
#define clocked_input_wrap 2
static const uint16_t clocked_input_program_instructions[] = {
// .wrap_target
0x2021, // 0: wait 0 pin, 1
0x20a1, // 1: wait 1 pin, 1
0x4001, // 2: in pins, 1
// .wrap
static const struct pio_program clocked_input_program = {
.instructions = clocked_input_program_instructions,
.length = 3,
.origin = -1,
static inline pio_sm_config clocked_input_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + clocked_input_wrap_target, offset + clocked_input_wrap);
return c;
static inline void clocked_input_program_init(PIO pio, uint sm, uint offset, uint pin) {
pio_sm_config c = clocked_input_program_get_default_config(offset);
// Set the IN base pin to the provided `pin` parameter. This is the data
// pin, and the next-numbered GPIO is used as the clock pin.
sm_config_set_in_pins(&c, pin);
// Set the pin directions to input at the PIO
pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false);
// Connect these GPIOs to this PIO block
pio_gpio_init(pio, pin);
pio_gpio_init(pio, pin + 1);
// Shifting to left matches the customary MSB-first ordering of SPI.
false, // Shift-to-right = false (i.e. shift to left)
true, // Autopush enabled
8 // Autopush threshold = 8
// We only receive, so disable the TX FIFO to make the RX FIFO deeper.
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
// Load our configuration, and start the program from the beginning
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,104 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program differential_manchester_tx
.side_set 1 opt
; Transmit one bit every 16 cycles. In each bit period:
; - A '0' is encoded as a transition at the start of the bit period
; - A '1' is encoded as a transition at the start *and* in the middle
; Side-set bit 0 must be mapped to the data output pin.
; Autopull must be enabled.
public start:
out x, 1 ; Start of bit period: always assert transition
jmp !x high_0 side 1 [6] ; Test the data bit we just shifted out of OSR
jmp initial_high side 0 [6] ; For `1` bits, also transition in the middle
jmp initial_low [7] ; Otherwise, the line is stable in the middle
out x, 1 ; Always shift 1 bit from OSR to X so we can
jmp !x low_0 side 0 [6] ; branch on it. Autopull refills OSR for us.
jmp initial_low side 1 [6] ; If there are two transitions, return to
jmp initial_high [7] ; the initial line state is flipped!
% c-sdk {
static inline void differential_manchester_tx_program_init(PIO pio, uint sm, uint offset, uint pin, float div) {
pio_sm_set_pins_with_mask(pio, sm, 0, 1u << pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_gpio_init(pio, pin);
pio_sm_config c = differential_manchester_tx_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, pin);
sm_config_set_out_shift(&c, true, true, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset + differential_manchester_tx_offset_start, &c);
// Execute a blocking pull so that we maintain the initial line state until data is available
pio_sm_exec(pio, sm, pio_encode_pull(false, true));
pio_sm_set_enabled(pio, sm, true);
.program differential_manchester_rx
; Assumes line is idle low
; One bit is 16 cycles. In each bit period:
; - A '0' is encoded as a transition at time 0
; - A '1' is encoded as a transition at time 0 and a transition at time T/2
; The IN mapping and the JMP pin select must both be mapped to the GPIO used for
; RX data. Autopush must be enabled.
public start:
initial_high: ; Find rising edge at start of bit period
wait 1 pin, 0 [11] ; Delay to eye of second half-period (i.e 3/4 of way
jmp pin high_0 ; through bit) and branch on RX pin high/low.
in x, 1 ; Second transition detected (a `1` data symbol)
jmp initial_high
in y, 1 [1] ; Line still high, no centre transition (data is `0`)
; Fall-through
initial_low: ; Find falling edge at start of bit period
wait 0 pin, 0 [11] ; Delay to eye of second half-period
jmp pin low_1
in y, 1 ; Line still low, no centre transition (data is `0`)
jmp initial_high
low_1: ; Second transition detected (data is `1`)
in x, 1 [1]
% c-sdk {
static inline void differential_manchester_rx_program_init(PIO pio, uint sm, uint offset, uint pin, float div) {
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
pio_gpio_init(pio, pin);
pio_sm_config c = differential_manchester_rx_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT
sm_config_set_jmp_pin(&c, pin); // for JMP
sm_config_set_in_shift(&c, true, true, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
// X and Y are set to 0 and 1, to conveniently emit these to ISR/FIFO.
pio_sm_exec(pio, sm, pio_encode_set(pio_x, 1));
pio_sm_exec(pio, sm, pio_encode_set(pio_y, 0));
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,120 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// -------------------------- //
// differential_manchester_tx //
// -------------------------- //
#define differential_manchester_tx_wrap_target 0
#define differential_manchester_tx_wrap 9
#define differential_manchester_tx_offset_start 0u
static const uint16_t differential_manchester_tx_program_instructions[] = {
// .wrap_target
0x6021, // 0: out x, 1
0x1e24, // 1: jmp !x, 4 side 1 [6]
0xa042, // 2: nop
0x1600, // 3: jmp 0 side 0 [6]
0x0705, // 4: jmp 5 [7]
0x6021, // 5: out x, 1
0x1629, // 6: jmp !x, 9 side 0 [6]
0xa042, // 7: nop
0x1e05, // 8: jmp 5 side 1 [6]
0x0700, // 9: jmp 0 [7]
// .wrap
static const struct pio_program differential_manchester_tx_program = {
.instructions = differential_manchester_tx_program_instructions,
.length = 10,
.origin = -1,
static inline pio_sm_config differential_manchester_tx_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + differential_manchester_tx_wrap_target, offset + differential_manchester_tx_wrap);
sm_config_set_sideset(&c, 2, true, false);
return c;
static inline void differential_manchester_tx_program_init(PIO pio, uint sm, uint offset, uint pin, float div) {
pio_sm_set_pins_with_mask(pio, sm, 0, 1u << pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_gpio_init(pio, pin);
pio_sm_config c = differential_manchester_tx_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, pin);
sm_config_set_out_shift(&c, true, true, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset + differential_manchester_tx_offset_start, &c);
// Execute a blocking pull so that we maintain the initial line state until data is available
pio_sm_exec(pio, sm, pio_encode_pull(false, true));
pio_sm_set_enabled(pio, sm, true);
// -------------------------- //
// differential_manchester_rx //
// -------------------------- //
#define differential_manchester_rx_wrap_target 5
#define differential_manchester_rx_wrap 9
#define differential_manchester_rx_offset_start 0u
static const uint16_t differential_manchester_rx_program_instructions[] = {
0x2ba0, // 0: wait 1 pin, 0 [11]
0x00c4, // 1: jmp pin, 4
0x4021, // 2: in x, 1
0x0000, // 3: jmp 0
0x4141, // 4: in y, 1 [1]
// .wrap_target
0x2b20, // 5: wait 0 pin, 0 [11]
0x00c9, // 6: jmp pin, 9
0x4041, // 7: in y, 1
0x0000, // 8: jmp 0
0x4121, // 9: in x, 1 [1]
// .wrap
static const struct pio_program differential_manchester_rx_program = {
.instructions = differential_manchester_rx_program_instructions,
.length = 10,
.origin = -1,
static inline pio_sm_config differential_manchester_rx_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + differential_manchester_rx_wrap_target, offset + differential_manchester_rx_wrap);
return c;
static inline void differential_manchester_rx_program_init(PIO pio, uint sm, uint offset, uint pin, float div) {
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
pio_gpio_init(pio, pin);
pio_sm_config c = differential_manchester_rx_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT
sm_config_set_jmp_pin(&c, pin); // for JMP
sm_config_set_in_shift(&c, true, true, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
// X and Y are set to 0 and 1, to conveniently emit these to ISR/FIFO.
pio_sm_exec(pio, sm, pio_encode_set(pio_x, 1));
pio_sm_exec(pio, sm, pio_encode_set(pio_y, 0));
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,34 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program hello
; Repeatedly get one word of data from the TX FIFO, stalling when the FIFO is
; empty. Write the least significant bit to the OUT pin group.
out pins, 1
jmp loop
% c-sdk {
static inline void hello_program_init(PIO pio, uint sm, uint offset, uint pin) {
pio_sm_config c = hello_program_get_default_config(offset);
// Map the state machine's OUT pin group to one pin, namely the `pin`
// parameter to this function.
sm_config_set_out_pins(&c, pin, 1);
// Set this pin's GPIO function (connect PIO to the pad)
pio_gpio_init(pio, pin);
// Set the pin direction to output at the PIO
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
// Load our configuration, and jump to the start of the program
pio_sm_init(pio, sm, offset, &c);
// Set the state machine running
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,55 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ----- //
// hello //
// ----- //
#define hello_wrap_target 0
#define hello_wrap 2
static const uint16_t hello_program_instructions[] = {
// .wrap_target
0x80a0, // 0: pull block
0x6001, // 1: out pins, 1
0x0000, // 2: jmp 0
// .wrap
static const struct pio_program hello_program = {
.instructions = hello_program_instructions,
.length = 3,
.origin = -1,
static inline pio_sm_config hello_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + hello_wrap_target, offset + hello_wrap);
return c;
static inline void hello_program_init(PIO pio, uint sm, uint offset, uint pin) {
pio_sm_config c = hello_program_get_default_config(offset);
// Map the state machine's OUT pin group to one pin, namely the `pin`
// parameter to this function.
sm_config_set_out_pins(&c, pin, 1);
// Set this pin's GPIO function (connect PIO to the pad)
pio_gpio_init(pio, pin);
// Set the pin direction to output at the PIO
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
// Load our configuration, and jump to the start of the program
pio_sm_init(pio, sm, offset, &c);
// Set the state machine running
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,128 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program hub75_row
; side-set pin 0 is LATCH
; side-set pin 1 is OEn
; OUT pins are row select A-E
; Each FIFO record consists of:
; - 5-bit row select (LSBs)
; - Pulse width - 1 (27 MSBs)
; Repeatedly select a row, pulse LATCH, and generate a pulse of a certain
; width on OEn.
.side_set 2
out pins, 5 [7] side 0x2 ; Deassert OEn, output row select
out x, 27 [7] side 0x3 ; Pulse LATCH, get OEn pulse width
jmp x-- pulse_loop side 0x0 ; Assert OEn for x+1 cycles
% c-sdk {
static inline void hub75_row_program_init(PIO pio, uint sm, uint offset, uint row_base_pin, uint n_row_pins, uint latch_base_pin) {
pio_sm_set_consecutive_pindirs(pio, sm, row_base_pin, n_row_pins, true);
pio_sm_set_consecutive_pindirs(pio, sm, latch_base_pin, 2, true);
for (uint i = row_base_pin; i < row_base_pin + n_row_pins; ++i)
pio_gpio_init(pio, i);
pio_gpio_init(pio, latch_base_pin);
pio_gpio_init(pio, latch_base_pin + 1);
pio_sm_config c = hub75_row_program_get_default_config(offset);
sm_config_set_out_pins(&c, row_base_pin, n_row_pins);
sm_config_set_sideset_pins(&c, latch_base_pin);
sm_config_set_out_shift(&c, true, true, 32);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
static inline void hub75_wait_tx_stall(PIO pio, uint sm) {
uint32_t txstall_mask = 1u << (PIO_FDEBUG_TXSTALL_LSB + sm);
pio->fdebug = txstall_mask;
while (!(pio->fdebug & txstall_mask))
.program hub75_data_rgb888
.side_set 1
; Each FIFO record consists of a RGB888 pixel. (This is ok for e.g. an RGB565
; source which has been gamma-corrected)
; Even pixels are sent on R0, G0, B0 and odd pixels on R1, G1, B1 (typically
; these are for different parts of the screen, NOT for adjacent pixels, so the
; frame buffer must be interleaved before passing to PIO.)
; Each pass through, we take bit n, n + 8 and n + 16 from each pixel, for n in
; {0...7}. Therefore the pixels need to be transmitted 8 times (ouch) to build
; up the full 8 bit value for each channel, and perform bit-planed PWM by
; varying pulse widths on the other state machine, in ascending powers of 2.
; This avoids a lot of bit shuffling on the processors, at the cost of DMA
; bandwidth (which we have loads of).
; Might want to close your eyes before you read this
public entry_point:
public shift0:
pull side 0 ; gets patched to `out null, n` if n nonzero (otherwise the PULL is required for fencing)
in osr, 1 side 0 ; shuffle shuffle shuffle
out null, 8 side 0
in osr, 1 side 0
out null, 8 side 0
in osr, 1 side 0
out null, 32 side 0 ; Discard remainder of OSR contents
public shift1:
pull side 0 ; gets patched to out null, n if n is nonzero (otherwise PULL required)
in osr, 1 side 1 ; Note this posedge clocks in the data from the previous iteration
out null, 8 side 1
in osr, 1 side 1
out null, 8 side 1
in osr, 1 side 1
out null, 32 side 1
in null, 26 side 1 ; Note we are just doing this little manoeuvre here to get GPIOs in the order
mov pins, ::isr side 1 ; R0, G0, B0, R1, G1, B1. Can go 1 cycle faster if reversed
; Note that because the clock edge for pixel n is in the middle of pixel n +
; 1, a dummy pixel at the end is required to clock the last piece of genuine
; data. (Also 1 pixel of garbage is clocked out at the start, but this is
; harmless)
% c-sdk {
static inline void hub75_data_rgb888_program_init(PIO pio, uint sm, uint offset, uint rgb_base_pin, uint clock_pin) {
pio_sm_set_consecutive_pindirs(pio, sm, rgb_base_pin, 6, true);
pio_sm_set_consecutive_pindirs(pio, sm, clock_pin, 1, true);
for (uint i = rgb_base_pin; i < rgb_base_pin + 6; ++i)
pio_gpio_init(pio, i);
pio_gpio_init(pio, clock_pin);
pio_sm_config c = hub75_data_rgb888_program_get_default_config(offset);
sm_config_set_out_pins(&c, rgb_base_pin, 6);
sm_config_set_sideset_pins(&c, clock_pin);
sm_config_set_out_shift(&c, true, true, 24);
// ISR shift to left. R0 ends up at bit 5. We push it up to MSB and then flip the register.
sm_config_set_in_shift(&c, false, false, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
pio_sm_init(pio, sm, offset, &c);
pio_sm_exec(pio, sm, offset + hub75_data_rgb888_offset_entry_point);
pio_sm_set_enabled(pio, sm, true);
// Patch a data program at `offset` to preshift pixels by `shamt`
static inline void hub75_data_rgb888_set_shift(PIO pio, uint sm, uint offset, uint shamt) {
uint16_t instr;
if (shamt == 0)
instr = pio_encode_pull(false, true); // blocking PULL
instr = pio_encode_out(pio_null, shamt);
pio->instr_mem[offset + hub75_data_rgb888_offset_shift0] = instr;
pio->instr_mem[offset + hub75_data_rgb888_offset_shift1] = instr;

@ -0,0 +1,138 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// --------- //
// hub75_row //
// --------- //
#define hub75_row_wrap_target 0
#define hub75_row_wrap 2
static const uint16_t hub75_row_program_instructions[] = {
// .wrap_target
0x7705, // 0: out pins, 5 side 2 [7]
0x7f3b, // 1: out x, 27 side 3 [7]
0x0042, // 2: jmp x--, 2 side 0
// .wrap
static const struct pio_program hub75_row_program = {
.instructions = hub75_row_program_instructions,
.length = 3,
.origin = -1,
static inline pio_sm_config hub75_row_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + hub75_row_wrap_target, offset + hub75_row_wrap);
sm_config_set_sideset(&c, 2, false, false);
return c;
static inline void hub75_row_program_init(PIO pio, uint sm, uint offset, uint row_base_pin, uint n_row_pins, uint latch_base_pin) {
pio_sm_set_consecutive_pindirs(pio, sm, row_base_pin, n_row_pins, true);
pio_sm_set_consecutive_pindirs(pio, sm, latch_base_pin, 2, true);
for (uint i = row_base_pin; i < row_base_pin + n_row_pins; ++i)
pio_gpio_init(pio, i);
pio_gpio_init(pio, latch_base_pin);
pio_gpio_init(pio, latch_base_pin + 1);
pio_sm_config c = hub75_row_program_get_default_config(offset);
sm_config_set_out_pins(&c, row_base_pin, n_row_pins);
sm_config_set_sideset_pins(&c, latch_base_pin);
sm_config_set_out_shift(&c, true, true, 32);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
static inline void hub75_wait_tx_stall(PIO pio, uint sm) {
uint32_t txstall_mask = 1u << (PIO_FDEBUG_TXSTALL_LSB + sm);
pio->fdebug = txstall_mask;
while (!(pio->fdebug & txstall_mask))
// ----------------- //
// hub75_data_rgb888 //
// ----------------- //
#define hub75_data_rgb888_wrap_target 0
#define hub75_data_rgb888_wrap 15
#define hub75_data_rgb888_offset_entry_point 0u
#define hub75_data_rgb888_offset_shift0 0u
#define hub75_data_rgb888_offset_shift1 7u
static const uint16_t hub75_data_rgb888_program_instructions[] = {
// .wrap_target
0x80a0, // 0: pull block side 0
0x40e1, // 1: in osr, 1 side 0
0x6068, // 2: out null, 8 side 0
0x40e1, // 3: in osr, 1 side 0
0x6068, // 4: out null, 8 side 0
0x40e1, // 5: in osr, 1 side 0
0x6060, // 6: out null, 32 side 0
0x80a0, // 7: pull block side 0
0x50e1, // 8: in osr, 1 side 1
0x7068, // 9: out null, 8 side 1
0x50e1, // 10: in osr, 1 side 1
0x7068, // 11: out null, 8 side 1
0x50e1, // 12: in osr, 1 side 1
0x7060, // 13: out null, 32 side 1
0x507a, // 14: in null, 26 side 1
0xb016, // 15: mov pins, ::isr side 1
// .wrap
static const struct pio_program hub75_data_rgb888_program = {
.instructions = hub75_data_rgb888_program_instructions,
.length = 16,
.origin = -1,
static inline pio_sm_config hub75_data_rgb888_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + hub75_data_rgb888_wrap_target, offset + hub75_data_rgb888_wrap);
sm_config_set_sideset(&c, 1, false, false);
return c;
static inline void hub75_data_rgb888_program_init(PIO pio, uint sm, uint offset, uint rgb_base_pin, uint clock_pin) {
pio_sm_set_consecutive_pindirs(pio, sm, rgb_base_pin, 6, true);
pio_sm_set_consecutive_pindirs(pio, sm, clock_pin, 1, true);
for (uint i = rgb_base_pin; i < rgb_base_pin + 6; ++i)
pio_gpio_init(pio, i);
pio_gpio_init(pio, clock_pin);
pio_sm_config c = hub75_data_rgb888_program_get_default_config(offset);
sm_config_set_out_pins(&c, rgb_base_pin, 6);
sm_config_set_sideset_pins(&c, clock_pin);
sm_config_set_out_shift(&c, true, true, 24);
// ISR shift to left. R0 ends up at bit 5. We push it up to MSB and then flip the register.
sm_config_set_in_shift(&c, false, false, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
pio_sm_init(pio, sm, offset, &c);
pio_sm_exec(pio, sm, offset + hub75_data_rgb888_offset_entry_point);
pio_sm_set_enabled(pio, sm, true);
// Patch a data program at `offset` to preshift pixels by `shamt`
static inline void hub75_data_rgb888_set_shift(PIO pio, uint sm, uint offset, uint shamt) {
uint16_t instr;
if (shamt == 0)
instr = pio_encode_pull(false, true); // blocking PULL
instr = pio_encode_out(pio_null, shamt);
pio->instr_mem[offset + hub75_data_rgb888_offset_shift0] = instr;
pio->instr_mem[offset + hub75_data_rgb888_offset_shift1] = instr;

@ -0,0 +1,145 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program i2c
.side_set 1 opt pindirs
; TX Encoding:
; | 15:10 | 9 | 8:1 | 0 |
; | Instr | Final | Data | NAK |
; If Instr has a value n > 0, then this FIFO word has no
; data payload, and the next n + 1 words will be executed as instructions.
; Otherwise, shift out the 8 data bits, followed by the ACK bit.
; The Instr mechanism allows stop/start/repstart sequences to be programmed
; by the processor, and then carried out by the state machine at defined points
; in the datastream.
; The "Final" field should be set for the final byte in a transfer.
; This tells the state machine to ignore a NAK: if this field is not
; set, then any NAK will cause the state machine to halt and interrupt.
; Autopull should be enabled, with a threshold of 16.
; Autopush should be enabled, with a threshold of 8.
; The TX FIFO should be accessed with halfword writes, to ensure
; the data is immediately available in the OSR.
; Pin mapping:
; - Input pin 0 is SDA, 1 is SCL (if clock stretching used)
; - Jump pin is SDA
; - Side-set pin 0 is SCL
; - Set pin 0 is SDA
; - OUT pin 0 is SDA
; - SCL must be SDA + 1 (for wait mapping)
; The OE outputs should be inverted in the system IO controls!
; (It's possible for the inversion to be done in this program,
; but costs 2 instructions: 1 for inversion, and one to cope
; with the side effect of the MOV on TX shift counter.)
jmp y-- entry_point ; Continue if NAK was expected
irq wait 0 rel ; Otherwise stop, ask for help
set x, 7 ; Loop 8 times
out pindirs, 1 [7] ; Serialise write data (all-ones if reading)
nop side 1 [2] ; SCL rising edge
wait 1 pin, 1 [4] ; Allow clock to be stretched
in pins, 1 [7] ; Sample read data in middle of SCL pulse
jmp x-- bitloop side 0 [7] ; SCL falling edge
; Handle ACK pulse
out pindirs, 1 [7] ; On reads, we provide the ACK.
nop side 1 [7] ; SCL rising edge
wait 1 pin, 1 [7] ; Allow clock to be stretched
jmp pin do_nack side 0 [2] ; Test SDA for ACK/NAK, fall through if ACK
public entry_point:
out x, 6 ; Unpack Instr count
out y, 1 ; Unpack the NAK ignore bit
jmp !x do_byte ; Instr == 0, this is a data record.
out null, 32 ; Instr > 0, remainder of this OSR is invalid
out exec, 16 ; Execute one instruction per FIFO word
jmp x-- do_exec ; Repeat n + 1 times
% c-sdk {
#include "hardware/clocks.h"
#include "hardware/gpio.h"
static inline void i2c_program_init(PIO pio, uint sm, uint offset, uint pin_sda, uint pin_scl) {
assert(pin_scl == pin_sda + 1);
pio_sm_config c = i2c_program_get_default_config(offset);
// IO mapping
sm_config_set_out_pins(&c, pin_sda, 1);
sm_config_set_set_pins(&c, pin_sda, 1);
sm_config_set_in_pins(&c, pin_sda);
sm_config_set_sideset_pins(&c, pin_scl);
sm_config_set_jmp_pin(&c, pin_sda);
sm_config_set_out_shift(&c, false, true, 16);
sm_config_set_in_shift(&c, false, true, 8);
float div = (float)clock_get_hz(clk_sys) / (32 * 100000);
sm_config_set_clkdiv(&c, div);
// Try to avoid glitching the bus while connecting the IOs. Get things set
// up so that pin is driven down when PIO asserts OE low, and pulled up
// otherwise.
uint32_t both_pins = (1u << pin_sda) | (1u << pin_scl);
pio_sm_set_pins_with_mask(pio, sm, both_pins, both_pins);
pio_sm_set_pindirs_with_mask(pio, sm, both_pins, both_pins);
pio_gpio_init(pio, pin_sda);
gpio_set_oeover(pin_sda, GPIO_OVERRIDE_INVERT);
pio_gpio_init(pio, pin_scl);
gpio_set_oeover(pin_scl, GPIO_OVERRIDE_INVERT);
pio_sm_set_pins_with_mask(pio, sm, 0, both_pins);
// Clear IRQ flag before starting, and make sure flag doesn't actually
// assert a system-level interrupt (we're using it as a status flag)
pio_set_irq0_source_enabled(pio, pis_interrupt0 + sm, false);
pio_set_irq1_source_enabled(pio, pis_interrupt0 + sm, false);
pio_interrupt_clear(pio, sm);
// Configure and start SM
pio_sm_init(pio, sm, offset + i2c_offset_entry_point, &c);
pio_sm_set_enabled(pio, sm, true);
.program set_scl_sda
.side_set 1 opt
; Assemble a table of instructions which software can select from, and pass
; into the FIFO, to issue START/STOP/RSTART. This isn't intended to be run as
; a complete program.
set pindirs, 0 side 0 [7] ; SCL = 0, SDA = 0
set pindirs, 1 side 0 [7] ; SCL = 0, SDA = 1
set pindirs, 0 side 1 [7] ; SCL = 1, SDA = 0
set pindirs, 1 side 1 [7] ; SCL = 1, SDA = 1
% c-sdk {
// Define order of our instruction table
enum {
I2C_SC0_SD0 = 0,

@ -0,0 +1,136 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// --- //
// i2c //
// --- //
#define i2c_wrap_target 12
#define i2c_wrap 17
#define i2c_offset_entry_point 12u
static const uint16_t i2c_program_instructions[] = {
0x008c, // 0: jmp y--, 12
0xc030, // 1: irq wait 0 rel
0xe027, // 2: set x, 7
0x6781, // 3: out pindirs, 1 [7]
0xba42, // 4: nop side 1 [2]
0x24a1, // 5: wait 1 pin, 1 [4]
0x4701, // 6: in pins, 1 [7]
0x1743, // 7: jmp x--, 3 side 0 [7]
0x6781, // 8: out pindirs, 1 [7]
0xbf42, // 9: nop side 1 [7]
0x27a1, // 10: wait 1 pin, 1 [7]
0x12c0, // 11: jmp pin, 0 side 0 [2]
// .wrap_target
0x6026, // 12: out x, 6
0x6041, // 13: out y, 1
0x0022, // 14: jmp !x, 2
0x6060, // 15: out null, 32
0x60f0, // 16: out exec, 16
0x0050, // 17: jmp x--, 16
// .wrap
static const struct pio_program i2c_program = {
.instructions = i2c_program_instructions,
.length = 18,
.origin = -1,
static inline pio_sm_config i2c_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + i2c_wrap_target, offset + i2c_wrap);
sm_config_set_sideset(&c, 2, true, true);
return c;
#include "hardware/clocks.h"
#include "hardware/gpio.h"
static inline void i2c_program_init(PIO pio, uint sm, uint offset, uint pin_sda, uint pin_scl) {
assert(pin_scl == pin_sda + 1);
pio_sm_config c = i2c_program_get_default_config(offset);
// IO mapping
sm_config_set_out_pins(&c, pin_sda, 1);
sm_config_set_set_pins(&c, pin_sda, 1);
sm_config_set_in_pins(&c, pin_sda);
sm_config_set_sideset_pins(&c, pin_scl);
sm_config_set_jmp_pin(&c, pin_sda);
sm_config_set_out_shift(&c, false, true, 16);
sm_config_set_in_shift(&c, false, true, 8);
float div = (float)clock_get_hz(clk_sys) / (32 * 100000);
sm_config_set_clkdiv(&c, div);
// Try to avoid glitching the bus while connecting the IOs. Get things set
// up so that pin is driven down when PIO asserts OE low, and pulled up
// otherwise.
uint32_t both_pins = (1u << pin_sda) | (1u << pin_scl);
pio_sm_set_pins_with_mask(pio, sm, both_pins, both_pins);
pio_sm_set_pindirs_with_mask(pio, sm, both_pins, both_pins);
pio_gpio_init(pio, pin_sda);
gpio_set_oeover(pin_sda, GPIO_OVERRIDE_INVERT);
pio_gpio_init(pio, pin_scl);
gpio_set_oeover(pin_scl, GPIO_OVERRIDE_INVERT);
pio_sm_set_pins_with_mask(pio, sm, 0, both_pins);
// Clear IRQ flag before starting, and make sure flag doesn't actually
// assert a system-level interrupt (we're using it as a status flag)
pio_set_irq0_source_enabled(pio, pis_interrupt0 + sm, false);
pio_set_irq1_source_enabled(pio, pis_interrupt0 + sm, false);
pio_interrupt_clear(pio, sm);
// Configure and start SM
pio_sm_init(pio, sm, offset + i2c_offset_entry_point, &c);
pio_sm_set_enabled(pio, sm, true);
// ----------- //
// set_scl_sda //
// ----------- //
#define set_scl_sda_wrap_target 0
#define set_scl_sda_wrap 3
static const uint16_t set_scl_sda_program_instructions[] = {
// .wrap_target
0xf780, // 0: set pindirs, 0 side 0 [7]
0xf781, // 1: set pindirs, 1 side 0 [7]
0xff80, // 2: set pindirs, 0 side 1 [7]
0xff81, // 3: set pindirs, 1 side 1 [7]
// .wrap
static const struct pio_program set_scl_sda_program = {
.instructions = set_scl_sda_program_instructions,
.length = 4,
.origin = -1,
static inline pio_sm_config set_scl_sda_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + set_scl_sda_wrap_target, offset + set_scl_sda_wrap);
sm_config_set_sideset(&c, 2, true, false);
return c;
// Define order of our instruction table
enum {
I2C_SC0_SD0 = 0,

@ -0,0 +1,94 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program manchester_tx
.side_set 1 opt
; Transmit one bit every 12 cycles. a '0' is encoded as a high-low sequence
; (each part lasting half a bit period, or 6 cycles) and a '1' is encoded as a
; low-high sequence.
; Side-set bit 0 must be mapped to the GPIO used for TX.
; Autopull must be enabled -- this program does not care about the threshold.
; The program starts at the public label 'start'.
nop side 0 [5] ; Low for 6 cycles (5 delay, +1 for nop)
jmp get_bit side 1 [3] ; High for 4 cycles. 'get_bit' takes another 2 cycles
nop side 1 [5] ; Output high for 6 cycles
nop side 0 [3] ; Output low for 4 cycles
public start:
out x, 1 ; Always shift out one bit from OSR to X, so we can
jmp !x do_0 ; branch on it. Autopull refills the OSR when empty.
% c-sdk {
static inline void manchester_tx_program_init(PIO pio, uint sm, uint offset, uint pin, float div) {
pio_sm_set_pins_with_mask(pio, sm, 0, 1u << pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_gpio_init(pio, pin);
pio_sm_config c = manchester_tx_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, pin);
sm_config_set_out_shift(&c, true, true, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset + manchester_tx_offset_start, &c);
pio_sm_set_enabled(pio, sm, true);
.program manchester_rx
; Assumes line is idle low, first bit is 0
; One bit is 12 cycles
; a '0' is encoded as 10
; a '1' is encoded as 01
; Both the IN base and the JMP pin mapping must be pointed at the GPIO used for RX.
; Autopush must be enabled.
; Before enabling the SM, it should be placed in a 'wait 1, pin` state, so that
; it will not start sampling until the initial line idle state ends.
start_of_0: ; We are 0.25 bits into a 0 - signal is high
wait 0 pin 0 ; Wait for the 1->0 transition - at this point we are 0.5 into the bit
in y, 1 [8] ; Emit a 0, sleep 3/4 of a bit
jmp pin start_of_0 ; If signal is 1 again, it's another 0 bit, otherwise it's a 1
start_of_1: ; We are 0.25 bits into a 1 - signal is 1
wait 1 pin 0 ; Wait for the 0->1 transition - at this point we are 0.5 into the bit
in x, 1 [8] ; Emit a 1, sleep 3/4 of a bit
jmp pin start_of_0 ; If signal is 0 again, it's another 1 bit otherwise it's a 0
% c-sdk {
static inline void manchester_rx_program_init(PIO pio, uint sm, uint offset, uint pin, float div) {
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
pio_gpio_init(pio, pin);
pio_sm_config c = manchester_rx_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT
sm_config_set_jmp_pin(&c, pin); // for JMP
sm_config_set_in_shift(&c, true, true, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
// X and Y are set to 0 and 1, to conveniently emit these to ISR/FIFO.
pio_sm_exec(pio, sm, pio_encode_set(pio_x, 1));
pio_sm_exec(pio, sm, pio_encode_set(pio_y, 0));
// Assume line is idle low, and first transmitted bit is 0. Put SM in a
// wait state before enabling. RX will begin once the first 0 symbol is
// detected.
pio_sm_exec(pio, sm, pio_encode_wait_pin(1, 0) | pio_encode_delay(2));
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,112 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ------------- //
// manchester_tx //
// ------------- //
#define manchester_tx_wrap_target 0
#define manchester_tx_wrap 5
#define manchester_tx_offset_start 4u
static const uint16_t manchester_tx_program_instructions[] = {
// .wrap_target
0xb542, // 0: nop side 0 [5]
0x1b04, // 1: jmp 4 side 1 [3]
0xbd42, // 2: nop side 1 [5]
0xb342, // 3: nop side 0 [3]
0x6021, // 4: out x, 1
0x0022, // 5: jmp !x, 2
// .wrap
static const struct pio_program manchester_tx_program = {
.instructions = manchester_tx_program_instructions,
.length = 6,
.origin = -1,
static inline pio_sm_config manchester_tx_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + manchester_tx_wrap_target, offset + manchester_tx_wrap);
sm_config_set_sideset(&c, 2, true, false);
return c;
static inline void manchester_tx_program_init(PIO pio, uint sm, uint offset, uint pin, float div) {
pio_sm_set_pins_with_mask(pio, sm, 0, 1u << pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_gpio_init(pio, pin);
pio_sm_config c = manchester_tx_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, pin);
sm_config_set_out_shift(&c, true, true, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset + manchester_tx_offset_start, &c);
pio_sm_set_enabled(pio, sm, true);
// ------------- //
// manchester_rx //
// ------------- //
#define manchester_rx_wrap_target 3
#define manchester_rx_wrap 5
static const uint16_t manchester_rx_program_instructions[] = {
0x2020, // 0: wait 0 pin, 0
0x4841, // 1: in y, 1 [8]
0x00c0, // 2: jmp pin, 0
// .wrap_target
0x20a0, // 3: wait 1 pin, 0
0x4821, // 4: in x, 1 [8]
0x00c0, // 5: jmp pin, 0
// .wrap
static const struct pio_program manchester_rx_program = {
.instructions = manchester_rx_program_instructions,
.length = 6,
.origin = -1,
static inline pio_sm_config manchester_rx_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + manchester_rx_wrap_target, offset + manchester_rx_wrap);
return c;
static inline void manchester_rx_program_init(PIO pio, uint sm, uint offset, uint pin, float div) {
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
pio_gpio_init(pio, pin);
pio_sm_config c = manchester_rx_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT
sm_config_set_jmp_pin(&c, pin); // for JMP
sm_config_set_in_shift(&c, true, true, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
// X and Y are set to 0 and 1, to conveniently emit these to ISR/FIFO.
pio_sm_exec(pio, sm, pio_encode_set(pio_x, 1));
pio_sm_exec(pio, sm, pio_encode_set(pio_y, 0));
// Assume line is idle low, and first transmitted bit is 0. Put SM in a
// wait state before enabling. RX will begin once the first 0 symbol is
// detected.
pio_sm_exec(pio, sm, pio_encode_wait_pin(1, 0) | pio_encode_delay(2));
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,61 @@
; Copyright (c) 2021 mjcross
; SPDX-License-Identifier: BSD-3-Clause
.program nec_carrier_burst
; Generate bursts of carrier.
; Repeatedly wait for an IRQ to be set then clear it and generate 21 cycles of
; carrier with 25% duty cycle
.define NUM_CYCLES 21 ; how many carrier cycles to generate
.define BURST_IRQ 7 ; which IRQ should trigger a carrier burst
.define public TICKS_PER_LOOP 4 ; the number of instructions in the loop (for timing)
set X, (NUM_CYCLES - 1) ; initialise the loop counter
wait 1 irq BURST_IRQ ; wait for the IRQ then clear it
set pins, 1 ; set the pin high (1 cycle)
set pins, 0 [1] ; set the pin low (2 cycles)
jmp X--, cycle_loop ; (1 more cycle)
% c-sdk {
static inline void nec_carrier_burst_program_init(PIO pio, uint sm, uint offset, uint pin, float freq) {
// Create a new state machine configuration
pio_sm_config c = nec_carrier_burst_program_get_default_config (offset);
// Map the SET pin group to one pin, namely the `pin`
// parameter to this function.
sm_config_set_set_pins (&c, pin, 1);
// Set the GPIO function of the pin (connect the PIO to the pad)
pio_gpio_init (pio, pin);
// Set the pin direction to output at the PIO
pio_sm_set_consecutive_pindirs (pio, sm, pin, 1, true);
// Set the clock divider to generate the required frequency
float div = clock_get_hz (clk_sys) / (freq * nec_carrier_burst_TICKS_PER_LOOP);
sm_config_set_clkdiv (&c, div);
// Apply the configuration to the state machine
pio_sm_init (pio, sm, offset, &c);
// Set the state machine running
pio_sm_set_enabled (pio, sm, true);

@ -0,0 +1,70 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ----------------- //
// nec_carrier_burst //
// ----------------- //
#define nec_carrier_burst_wrap_target 0
#define nec_carrier_burst_wrap 4
#define nec_carrier_burst_TICKS_PER_LOOP 4
static const uint16_t nec_carrier_burst_program_instructions[] = {
// .wrap_target
0xe034, // 0: set x, 20
0x20c7, // 1: wait 1 irq, 7
0xe001, // 2: set pins, 1
0xe100, // 3: set pins, 0 [1]
0x0042, // 4: jmp x--, 2
// .wrap
static const struct pio_program nec_carrier_burst_program = {
.instructions = nec_carrier_burst_program_instructions,
.length = 5,
.origin = -1,
static inline pio_sm_config nec_carrier_burst_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + nec_carrier_burst_wrap_target, offset + nec_carrier_burst_wrap);
return c;
static inline void nec_carrier_burst_program_init(PIO pio, uint sm, uint offset, uint pin, float freq) {
// Create a new state machine configuration
pio_sm_config c = nec_carrier_burst_program_get_default_config (offset);
// Map the SET pin group to one pin, namely the `pin`
// parameter to this function.
sm_config_set_set_pins (&c, pin, 1);
// Set the GPIO function of the pin (connect the PIO to the pad)
pio_gpio_init (pio, pin);
// Set the pin direction to output at the PIO
pio_sm_set_consecutive_pindirs (pio, sm, pin, 1, true);
// Set the clock divider to generate the required frequency
float div = clock_get_hz (clk_sys) / (freq * nec_carrier_burst_TICKS_PER_LOOP);
sm_config_set_clkdiv (&c, div);
// Apply the configuration to the state machine
pio_sm_init (pio, sm, offset, &c);
// Set the state machine running
pio_sm_set_enabled (pio, sm, true);

@ -0,0 +1,79 @@
; Copyright (c) 2021 mjcross
; SPDX-License-Identifier: BSD-3-Clause
.program nec_carrier_control
; Transmit an encoded 32-bit frame in NEC IR format.
; Accepts 32-bit words from the transmit FIFO and sends them least-significant bit first
; using pulse position modulation.
; Carrier bursts are generated using the nec_carrier_burst program, which is expected to be
; running on a separate state machine.
; This program expects there to be 2 state machine ticks per 'normal' 562.5us
; burst period.
.define BURST_IRQ 7 ; the IRQ used to trigger a carrier burst
.define NUM_INITIAL_BURSTS 16 ; how many bursts to transmit for a 'sync burst'
pull ; fetch a data word from the transmit FIFO into the
; output shift register, blocking if the FIFO is empty
set X, (NUM_INITIAL_BURSTS - 1) ; send a sync burst (9ms)
jmp X-- long_burst
nop [15] ; send a 4.5ms space
irq BURST_IRQ [1] ; send a 562.5us burst to begin the first data bit
out X, 1 ; shift the least-significant bit from the OSR
jmp !X burst ; send a short delay for a '0' bit
nop [3] ; send an additional delay for a '1' bit
irq BURST_IRQ ; send a 562.5us burst to end the data bit
jmp !OSRE data_bit ; continue sending bits until the OSR is empty
.wrap ; fetch another data word from the FIFO
% c-sdk {
static inline void nec_carrier_control_program_init (PIO pio, uint sm, uint offset, float tick_rate, int bits_per_frame) {
// create a new state machine configuration
pio_sm_config c = nec_carrier_control_program_get_default_config(offset);
// configure the output shift register
sm_config_set_out_shift (&c,
true, // shift right
false, // disable autopull
// join the FIFOs to make a single large transmit FIFO
sm_config_set_fifo_join (&c, PIO_FIFO_JOIN_TX);
// configure the clock divider
float div = clock_get_hz (clk_sys) / tick_rate;
sm_config_set_clkdiv (&c, div);
// apply the configuration to the state machine
pio_sm_init(pio, sm, offset, &c);
// set the state machine running
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,73 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ------------------- //
// nec_carrier_control //
// ------------------- //
#define nec_carrier_control_wrap_target 0
#define nec_carrier_control_wrap 10
static const uint16_t nec_carrier_control_program_instructions[] = {
// .wrap_target
0x80a0, // 0: pull block
0xe02f, // 1: set x, 15
0xc007, // 2: irq nowait 7
0x0042, // 3: jmp x--, 2
0xaf42, // 4: nop [15]
0xc107, // 5: irq nowait 7 [1]
0x6021, // 6: out x, 1
0x0029, // 7: jmp !x, 9
0xa342, // 8: nop [3]
0xc007, // 9: irq nowait 7
0x00e6, // 10: jmp !osre, 6
// .wrap
static const struct pio_program nec_carrier_control_program = {
.instructions = nec_carrier_control_program_instructions,
.length = 11,
.origin = -1,
static inline pio_sm_config nec_carrier_control_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + nec_carrier_control_wrap_target, offset + nec_carrier_control_wrap);
return c;
static inline void nec_carrier_control_program_init (PIO pio, uint sm, uint offset, float tick_rate, int bits_per_frame) {
// create a new state machine configuration
pio_sm_config c = nec_carrier_control_program_get_default_config(offset);
// configure the output shift register
sm_config_set_out_shift (&c,
true, // shift right
false, // disable autopull
// join the FIFOs to make a single large transmit FIFO
sm_config_set_fifo_join (&c, PIO_FIFO_JOIN_TX);
// configure the clock divider
float div = clock_get_hz (clk_sys) / tick_rate;
sm_config_set_clkdiv (&c, div);
// apply the configuration to the state machine
pio_sm_init(pio, sm, offset, &c);
// set the state machine running
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,96 @@
; Copyright (c) 2021 mjcross
; SPDX-License-Identifier: BSD-3-Clause
.program nec_receive
; Decode IR frames in NEC format and push 32-bit words to the input FIFO.
; The input pin should be connected to an IR detector with an 'active low' output.
; This program expects there to be 10 state machine clock ticks per 'normal' 562.5us burst period
; in order to permit timely detection of start of a burst. The initailisation function below sets
; the correct divisor to achive this relative to the system clock.
; Within the 'NEC' protocol frames consists of 32 bits sent least-siginificant bit first; so the
; Input Shift Register should be configured to shift right and autopush after 32 bits, as in the
; initialisation function below.
.define BURST_LOOP_COUNTER 30 ; the detection threshold for a 'frame sync' burst
.define BIT_SAMPLE_DELAY 15 ; how long to wait after the end of the burst before sampling
wait 0 pin 0 ; wait for the next burst to start
jmp pin data_bit ; the burst ended before the counter expired
jmp X-- burst_loop ; wait for the burst to end
; the counter expired - this is a sync burst
mov ISR, NULL ; reset the Input Shift Register
wait 1 pin 0 ; wait for the sync burst to finish
jmp next_burst ; wait for the first data bit
nop [ BIT_SAMPLE_DELAY - 1 ] ; wait for 1.5 burst periods before sampling the bit value
in PINS, 1 ; if the next burst has started then detect a '0' (short gap)
; otherwise detect a '1' (long gap)
; after 32 bits the ISR will autopush to the receive FIFO
% c-sdk {
static inline void nec_receive_program_init (PIO pio, uint sm, uint offset, uint pin) {
// Set the GPIO function of the pin (connect the PIO to the pad)
pio_gpio_init(pio, pin);
// Set the pin direction to `input` at the PIO
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
// Create a new state machine configuration
pio_sm_config c = nec_receive_program_get_default_config (offset);
// configure the Input Shift Register
sm_config_set_in_shift (&c,
true, // shift right
true, // enable autopush
32); // autopush after 32 bits
// join the FIFOs to make a single large receive FIFO
sm_config_set_fifo_join (&c, PIO_FIFO_JOIN_RX);
// Map the IN pin group to one pin, namely the `pin`
// parameter to this function.
sm_config_set_in_pins (&c, pin);
// Map the JMP pin to the `pin` parameter of this function.
sm_config_set_jmp_pin (&c, pin);
// Set the clock divider to 10 ticks per 562.5us burst period
float div = clock_get_hz (clk_sys) / (10.0 / 562.5e-6);
sm_config_set_clkdiv (&c, div);
// Apply the configuration to the state machine
pio_sm_init (pio, sm, offset, &c);
// Set the state machine running
pio_sm_set_enabled (pio, sm, true);

@ -0,0 +1,84 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ----------- //
// nec_receive //
// ----------- //
#define nec_receive_wrap_target 0
#define nec_receive_wrap 8
static const uint16_t nec_receive_program_instructions[] = {
// .wrap_target
0xe03e, // 0: set x, 30
0x2020, // 1: wait 0 pin, 0
0x00c7, // 2: jmp pin, 7
0x0042, // 3: jmp x--, 2
0xa0c3, // 4: mov isr, null
0x20a0, // 5: wait 1 pin, 0
0x0000, // 6: jmp 0
0xae42, // 7: nop [14]
0x4001, // 8: in pins, 1
// .wrap
static const struct pio_program nec_receive_program = {
.instructions = nec_receive_program_instructions,
.length = 9,
.origin = -1,
static inline pio_sm_config nec_receive_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + nec_receive_wrap_target, offset + nec_receive_wrap);
return c;
static inline void nec_receive_program_init (PIO pio, uint sm, uint offset, uint pin) {
// Set the GPIO function of the pin (connect the PIO to the pad)
pio_gpio_init(pio, pin);
// Set the pin direction to `input` at the PIO
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
// Create a new state machine configuration
pio_sm_config c = nec_receive_program_get_default_config (offset);
// configure the Input Shift Register
sm_config_set_in_shift (&c,
true, // shift right
true, // enable autopush
32); // autopush after 32 bits
// join the FIFOs to make a single large receive FIFO
sm_config_set_fifo_join (&c, PIO_FIFO_JOIN_RX);
// Map the IN pin group to one pin, namely the `pin`
// parameter to this function.
sm_config_set_in_pins (&c, pin);
// Map the JMP pin to the `pin` parameter of this function.
sm_config_set_jmp_pin (&c, pin);
// Set the clock divider to 10 ticks per 562.5us burst period
float div = clock_get_hz (clk_sys) / (10.0 / 562.5e-6);
sm_config_set_clkdiv (&c, div);
// Apply the configuration to the state machine
pio_sm_init (pio, sm, offset, &c);
// Set the state machine running
pio_sm_set_enabled (pio, sm, true);

@ -0,0 +1,27 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program pio_serialiser
; Just serialise a stream of bits. Take 32 bits from each FIFO record. LSB-first.
out pins, 1
% c-sdk {
static inline void pio_serialiser_program_init(PIO pio, uint sm, uint offset, uint data_pin, float clk_div) {
pio_gpio_init(pio, data_pin);
pio_sm_set_consecutive_pindirs(pio, sm, data_pin, 1, true);
pio_sm_config c = pio_serialiser_program_get_default_config(offset);
sm_config_set_out_pins(&c, data_pin, 1);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
sm_config_set_clkdiv(&c, clk_div);
sm_config_set_out_shift(&c, true, true, 32);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,50 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// -------------- //
// pio_serialiser //
// -------------- //
#define pio_serialiser_wrap_target 0
#define pio_serialiser_wrap 0
static const uint16_t pio_serialiser_program_instructions[] = {
// .wrap_target
0x6001, // 0: out pins, 1
// .wrap
static const struct pio_program pio_serialiser_program = {
.instructions = pio_serialiser_program_instructions,
.length = 1,
.origin = -1,
static inline pio_sm_config pio_serialiser_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + pio_serialiser_wrap_target, offset + pio_serialiser_wrap);
return c;
static inline void pio_serialiser_program_init(PIO pio, uint sm, uint offset, uint data_pin, float clk_div) {
pio_gpio_init(pio, data_pin);
pio_sm_set_consecutive_pindirs(pio, sm, data_pin, 1, true);
pio_sm_config c = pio_serialiser_program_get_default_config(offset);
sm_config_set_out_pins(&c, data_pin, 1);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
sm_config_set_clkdiv(&c, clk_div);
sm_config_set_out_shift(&c, true, true, 32);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,31 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
; Side-set pin 0 is used for PWM output
.program pwm
.side_set 1 opt
pull noblock side 0 ; Pull from FIFO to OSR if available, else copy X to OSR.
mov x, osr ; Copy most-recently-pulled value back to scratch X
mov y, isr ; ISR contains PWM period. Y used as counter.
jmp x!=y noset ; Set pin high if X == Y, keep the two paths length matched
jmp skip side 1
nop ; Single dummy cycle to keep the two paths the same length
jmp y-- countloop ; Loop until Y hits 0, then pull a fresh PWM value from FIFO
% c-sdk {
static inline void pwm_program_init(PIO pio, uint sm, uint offset, uint pin) {
pio_gpio_init(pio, pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_sm_config c = pwm_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, pin);
pio_sm_init(pio, sm, offset, &c);

@ -0,0 +1,53 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// --- //
// pwm //
// --- //
#define pwm_wrap_target 0
#define pwm_wrap 6
static const uint16_t pwm_program_instructions[] = {
// .wrap_target
0x9080, // 0: pull noblock side 0
0xa027, // 1: mov x, osr
0xa046, // 2: mov y, isr
0x00a5, // 3: jmp x != y, 5
0x1806, // 4: jmp 6 side 1
0xa042, // 5: nop
0x0083, // 6: jmp y--, 3
// .wrap
static const struct pio_program pwm_program = {
.instructions = pwm_program_instructions,
.length = 7,
.origin = -1,
static inline pio_sm_config pwm_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + pwm_wrap_target, offset + pwm_wrap);
sm_config_set_sideset(&c, 2, true, false);
return c;
static inline void pwm_program_init(PIO pio, uint sm, uint offset, uint pin) {
pio_gpio_init(pio, pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_sm_config c = pwm_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, pin);
pio_sm_init(pio, sm, offset, &c);

@ -0,0 +1,165 @@
; Copyright (c) 2021 pmarques-dev @ github
; SPDX-License-Identifier: BSD-3-Clause
.program quadrature_encoder
; this code must be loaded into address 0, but at 29 instructions, it probably
; wouldn't be able to share space with other programs anyway
.origin 0
; the code works by running a loop that continuously shifts the 2 phase pins into
; ISR and looks at the lower 4 bits to do a computed jump to an instruction that
; does the proper "do nothing" | "increment" | "decrement" action for that pin
; state change (or no change)
; ISR holds the last state of the 2 pins during most of the code. The Y register
; keeps the current encoder count and is incremented / decremented according to
; the steps sampled
; writing any non zero value to the TX FIFO makes the state machine push the
; current count to RX FIFO between 6 to 18 clocks afterwards. The worst case
; sampling loop takes 14 cycles, so this program is able to read step rates up
; to sysclk / 14 (e.g., sysclk 125MHz, max step rate = 8.9 Msteps/sec)
; 00 state
JMP update ; read 00
JMP decrement ; read 01
JMP increment ; read 10
JMP update ; read 11
; 01 state
JMP increment ; read 00
JMP update ; read 01
JMP update ; read 10
JMP decrement ; read 11
; 10 state
JMP decrement ; read 00
JMP update ; read 01
JMP update ; read 10
JMP increment ; read 11
; to reduce code size, the last 2 states are implemented in place and become the
; target for the other jumps
; 11 state
JMP update ; read 00
JMP increment ; read 01
; note: the target of this instruction must be the next address, so that
; the effect of the instruction does not depend on the value of Y. The
; same is true for the "JMP X--" below. Basically "JMP Y--, <next addr>"
; is just a pure "decrement Y" instruction, with no other side effects
JMP Y--, update ; read 10
; this is where the main loop starts
; we start by checking the TX FIFO to see if the main code is asking for
; the current count after the PULL noblock, OSR will have either 0 if
; there was nothing or the value that was there
SET X, 0
PULL noblock
; since there are not many free registers, and PULL is done into OSR, we
; have to do some juggling to avoid losing the state information and
; still place the values where we need them
; the main code did not ask for the count, so just go to "sample_pins"
JMP !X, sample_pins
; if it did ask for the count, then we push it
MOV ISR, Y ; we trash ISR, but we already have a copy in OSR
; we shift into ISR the last state of the 2 input pins (now in OSR) and
; the new state of the 2 pins, thus producing the 4 bit target for the
; computed jump into the correct action for this state
; the PIO does not have a increment instruction, so to do that we do a
; negate, decrement, negate sequence
JMP X--, increment_cont
.wrap ; the .wrap here avoids one jump instruction and saves a cycle too
% c-sdk {
#include "hardware/clocks.h"
#include "hardware/gpio.h"
// max_step_rate is used to lower the clock of the state machine to save power
// if the application doesn't require a very high sampling rate. Passing zero
// will set the clock to the maximum, which gives a max step rate of around
// 8.9 Msteps/sec at 125MHz
static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint offset, uint pin, int max_step_rate)
pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false);
gpio_pull_up(pin + 1);
pio_sm_config c = quadrature_encoder_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT, IN
sm_config_set_jmp_pin(&c, pin); // for JMP
// shift to left, autopull disabled
sm_config_set_in_shift(&c, false, false, 32);
// don't join FIFO's
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_NONE);
// passing "0" as the sample frequency,
if (max_step_rate == 0) {
sm_config_set_clkdiv(&c, 1.0);
} else {
// one state machine loop takes at most 14 cycles
float div = (float)clock_get_hz(clk_sys) / (14 * max_step_rate);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
// When requesting the current count we may have to wait a few cycles (average
// ~11 sysclk cycles) for the state machine to reply. If we are reading multiple
// encoders, we may request them all in one go and then fetch them all, thus
// avoiding doing the wait multiple times. If we are reading just one encoder,
// we can use the "get_count" function to request and wait
static inline void quadrature_encoder_request_count(PIO pio, uint sm)
pio->txf[sm] = 1;
static inline int32_t quadrature_encoder_fetch_count(PIO pio, uint sm)
while (pio_sm_is_rx_fifo_empty(pio, sm))
return pio->rxf[sm];
static inline int32_t quadrature_encoder_get_count(PIO pio, uint sm)
quadrature_encoder_request_count(pio, sm);
return quadrature_encoder_fetch_count(pio, sm);

@ -0,0 +1,116 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ------------------ //
// quadrature_encoder //
// ------------------ //
#define quadrature_encoder_wrap_target 15
#define quadrature_encoder_wrap 28
static const uint16_t quadrature_encoder_program_instructions[] = {
0x000f, // 0: jmp 15
0x000e, // 1: jmp 14
0x001a, // 2: jmp 26
0x000f, // 3: jmp 15
0x001a, // 4: jmp 26
0x000f, // 5: jmp 15
0x000f, // 6: jmp 15
0x000e, // 7: jmp 14
0x000e, // 8: jmp 14
0x000f, // 9: jmp 15
0x000f, // 10: jmp 15
0x001a, // 11: jmp 26
0x000f, // 12: jmp 15
0x001a, // 13: jmp 26
0x008f, // 14: jmp y--, 15
// .wrap_target
0xe020, // 15: set x, 0
0x8080, // 16: pull noblock
0xa027, // 17: mov x, osr
0xa0e6, // 18: mov osr, isr
0x0036, // 19: jmp !x, 22
0xa0c2, // 20: mov isr, y
0x8020, // 21: push block
0xa0c3, // 22: mov isr, null
0x40e2, // 23: in osr, 2
0x4002, // 24: in pins, 2
0xa0a6, // 25: mov pc, isr
0xa02a, // 26: mov x, !y
0x005c, // 27: jmp x--, 28
0xa049, // 28: mov y, !x
// .wrap
static const struct pio_program quadrature_encoder_program = {
.instructions = quadrature_encoder_program_instructions,
.length = 29,
.origin = 0,
static inline pio_sm_config quadrature_encoder_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + quadrature_encoder_wrap_target, offset + quadrature_encoder_wrap);
return c;
#include "hardware/clocks.h"
#include "hardware/gpio.h"
// max_step_rate is used to lower the clock of the state machine to save power
// if the application doesn't require a very high sampling rate. Passing zero
// will set the clock to the maximum, which gives a max step rate of around
// 8.9 Msteps/sec at 125MHz
static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint offset, uint pin, int max_step_rate)
pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false);
gpio_pull_up(pin + 1);
pio_sm_config c = quadrature_encoder_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT, IN
sm_config_set_jmp_pin(&c, pin); // for JMP
// shift to left, autopull disabled
sm_config_set_in_shift(&c, false, false, 32);
// don't join FIFO's
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_NONE);
// passing "0" as the sample frequency,
if (max_step_rate == 0) {
sm_config_set_clkdiv(&c, 1.0);
} else {
// one state machine loop takes at most 14 cycles
float div = (float)clock_get_hz(clk_sys) / (14 * max_step_rate);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
// When requesting the current count we may have to wait a few cycles (average
// ~11 sysclk cycles) for the state machine to reply. If we are reading multiple
// encoders, we may request them all in one go and then fetch them all, thus
// avoiding doing the wait multiple times. If we are reading just one encoder,
// we can use the "get_count" function to request and wait
static inline void quadrature_encoder_request_count(PIO pio, uint sm)
pio->txf[sm] = 1;
static inline int32_t quadrature_encoder_fetch_count(PIO pio, uint sm)
while (pio_sm_is_rx_fifo_empty(pio, sm))
return pio->rxf[sm];
static inline int32_t quadrature_encoder_get_count(PIO pio, uint sm)
quadrature_encoder_request_count(pio, sm);
return quadrature_encoder_fetch_count(pio, sm);

@ -0,0 +1,38 @@
; Copyright (c) 2021 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program resistor_dac_5bit
; Drive one of the 5-bit resistor DACs on the VGA reference board. (this isn't
; a good way to do VGA -- just want a nice sawtooth for the ADC example!)
out pins, 5
% c-sdk {
#include "hardware/clocks.h"
static inline void resistor_dac_5bit_program_init(PIO pio, uint sm, uint offset,
uint sample_rate_hz, uint pin_base) {
pio_sm_set_pins_with_mask(pio, sm, 0, 0x1fu << pin_base);
pio_sm_set_pindirs_with_mask(pio, sm, ~0u, 0x1fu << pin_base);
for (int i = 0; i < 5; ++i)
pio_gpio_init(pio, pin_base + i);
pio_sm_config c = resistor_dac_5bit_program_get_default_config(offset);
sm_config_set_out_pins(&c, pin_base, 5);
// Shift to right, autopull threshold 5
sm_config_set_out_shift(&c, true, true, 5);
// Deeper FIFO as we're not doing any RX
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
float div = (float)clock_get_hz(clk_sys) / sample_rate_hz;
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,57 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ----------------- //
// resistor_dac_5bit //
// ----------------- //
#define resistor_dac_5bit_wrap_target 0
#define resistor_dac_5bit_wrap 0
static const uint16_t resistor_dac_5bit_program_instructions[] = {
// .wrap_target
0x6005, // 0: out pins, 5
// .wrap
static const struct pio_program resistor_dac_5bit_program = {
.instructions = resistor_dac_5bit_program_instructions,
.length = 1,
.origin = -1,
static inline pio_sm_config resistor_dac_5bit_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + resistor_dac_5bit_wrap_target, offset + resistor_dac_5bit_wrap);
return c;
#include "hardware/clocks.h"
static inline void resistor_dac_5bit_program_init(PIO pio, uint sm, uint offset,
uint sample_rate_hz, uint pin_base) {
pio_sm_set_pins_with_mask(pio, sm, 0, 0x1fu << pin_base);
pio_sm_set_pindirs_with_mask(pio, sm, ~0u, 0x1fu << pin_base);
for (int i = 0; i < 5; ++i)
pio_gpio_init(pio, pin_base + i);
pio_sm_config c = resistor_dac_5bit_program_get_default_config(offset);
sm_config_set_out_pins(&c, pin_base, 5);
// Shift to right, autopull threshold 5
sm_config_set_out_shift(&c, true, true, 5);
// Deeper FIFO as we're not doing any RX
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
float div = (float)clock_get_hz(clk_sys) / sample_rate_hz;
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,168 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
; These programs implement full-duplex SPI, with a SCK period of 4 clock
; cycles. A different program is provided for each value of CPHA, and CPOL is
; achieved using the hardware GPIO inversion available in the IO controls.
; Transmit-only SPI can go twice as fast -- see the ST7789 example!
.program spi_cpha0
.side_set 1
; Pin assignments:
; - SCK is side-set pin 0
; - MOSI is OUT pin 0
; - MISO is IN pin 0
; Autopush and autopull must be enabled, and the serial frame size is set by
; configuring the push/pull threshold. Shift left/right is fine, but you must
; justify the data yourself. This is done most conveniently for frame sizes of
; 8 or 16 bits by using the narrow store replication and narrow load byte
; picking behaviour of RP2040's IO fabric.
; Clock phase = 0: data is captured on the leading edge of each SCK pulse, and
; transitions on the trailing edge, or some time before the first leading edge.
out pins, 1 side 0 [1] ; Stall here on empty (sideset proceeds even if
in pins, 1 side 1 [1] ; instruction stalls, so we stall with SCK low)
.program spi_cpha1
.side_set 1
; Clock phase = 1: data transitions on the leading edge of each SCK pulse, and
; is captured on the trailing edge.
out x, 1 side 0 ; Stall here on empty (keep SCK deasserted)
mov pins, x side 1 [1] ; Output data, assert SCK (mov pins uses OUT mapping)
in pins, 1 side 0 ; Input data, deassert SCK
% c-sdk {
#include "hardware/gpio.h"
static inline void pio_spi_init(PIO pio, uint sm, uint prog_offs, uint n_bits,
float clkdiv, bool cpha, bool cpol, uint pin_sck, uint pin_mosi, uint pin_miso) {
pio_sm_config c = cpha ? spi_cpha1_program_get_default_config(prog_offs) : spi_cpha0_program_get_default_config(prog_offs);
sm_config_set_out_pins(&c, pin_mosi, 1);
sm_config_set_in_pins(&c, pin_miso);
sm_config_set_sideset_pins(&c, pin_sck);
// Only support MSB-first in this example code (shift to left, auto push/pull, threshold=nbits)
sm_config_set_out_shift(&c, false, true, n_bits);
sm_config_set_in_shift(&c, false, true, n_bits);
sm_config_set_clkdiv(&c, clkdiv);
// MOSI, SCK output are low, MISO is input
pio_sm_set_pins_with_mask(pio, sm, 0, (1u << pin_sck) | (1u << pin_mosi));
pio_sm_set_pindirs_with_mask(pio, sm, (1u << pin_sck) | (1u << pin_mosi), (1u << pin_sck) | (1u << pin_mosi) | (1u << pin_miso));
pio_gpio_init(pio, pin_mosi);
pio_gpio_init(pio, pin_miso);
pio_gpio_init(pio, pin_sck);
// The pin muxes can be configured to invert the output (among other things
// and this is a cheesy way to get CPOL=1
gpio_set_outover(pin_sck, cpol ? GPIO_OVERRIDE_INVERT : GPIO_OVERRIDE_NORMAL);
// SPI is synchronous, so bypass input synchroniser to reduce input delay.
hw_set_bits(&pio->input_sync_bypass, 1u << pin_miso);
pio_sm_init(pio, sm, prog_offs, &c);
pio_sm_set_enabled(pio, sm, true);
; SPI with Chip Select
; -----------------------------------------------------------------------------
; For your amusement, here are some SPI programs with an automatic chip select
; (asserted once data appears in TX FIFO, deasserts when FIFO bottoms out, has
; a nice front/back porch).
; The number of bits per FIFO entry is configured via the Y register
; and the autopush/pull threshold. From 2 to 32 bits.
; Pin assignments:
; - SCK is side-set bit 0
; - CSn is side-set bit 1
; - MOSI is OUT bit 0 (host-to-device)
; - MISO is IN bit 0 (device-to-host)
; This program only supports one chip select -- use GPIO if more are needed
; Provide a variation for each possibility of CPHA; for CPOL we can just
; invert SCK in the IO muxing controls (downstream from PIO)
; CPHA=0: data is captured on the leading edge of each SCK pulse (including
; the first pulse), and transitions on the trailing edge
.program spi_cpha0_cs
.side_set 2
out pins, 1 side 0x0 [1]
in pins, 1 side 0x1
jmp x-- bitloop side 0x1
out pins, 1 side 0x0
mov x, y side 0x0 ; Reload bit counter from Y
in pins, 1 side 0x1
jmp !osre bitloop side 0x1 ; Fall-through if TXF empties
nop side 0x0 [1] ; CSn back porch
public entry_point: ; Must set X,Y to n-2 before starting!
pull ifempty side 0x2 [1] ; Block with CSn high (minimum 2 cycles)
.wrap ; Note ifempty to avoid time-of-check race
; CPHA=1: data transitions on the leading edge of each SCK pulse, and is
; captured on the trailing edge
.program spi_cpha1_cs
.side_set 2
out pins, 1 side 0x1 [1]
in pins, 1 side 0x0
jmp x-- bitloop side 0x0
out pins, 1 side 0x1
mov x, y side 0x1
in pins, 1 side 0x0
jmp !osre bitloop side 0x0
public entry_point: ; Must set X,Y to n-2 before starting!
pull ifempty side 0x2 [1] ; Block with CSn high (minimum 2 cycles)
nop side 0x0 [1]; CSn front porch
% c-sdk {
#include "hardware/gpio.h"
static inline void pio_spi_cs_init(PIO pio, uint sm, uint prog_offs, uint n_bits, float clkdiv, bool cpha, bool cpol,
uint pin_sck, uint pin_mosi, uint pin_miso) {
pio_sm_config c = cpha ? spi_cpha1_cs_program_get_default_config(prog_offs) : spi_cpha0_cs_program_get_default_config(prog_offs);
sm_config_set_out_pins(&c, pin_mosi, 1);
sm_config_set_in_pins(&c, pin_miso);
sm_config_set_sideset_pins(&c, pin_sck);
sm_config_set_out_shift(&c, false, true, n_bits);
sm_config_set_in_shift(&c, false, true, n_bits);
sm_config_set_clkdiv(&c, clkdiv);
pio_sm_set_pins_with_mask(pio, sm, (2u << pin_sck), (3u << pin_sck) | (1u << pin_mosi));
pio_sm_set_pindirs_with_mask(pio, sm, (3u << pin_sck) | (1u << pin_mosi), (3u << pin_sck) | (1u << pin_mosi) | (1u << pin_miso));
pio_gpio_init(pio, pin_mosi);
pio_gpio_init(pio, pin_miso);
pio_gpio_init(pio, pin_sck);
pio_gpio_init(pio, pin_sck + 1);
gpio_set_outover(pin_sck, cpol ? GPIO_OVERRIDE_INVERT : GPIO_OVERRIDE_NORMAL);
hw_set_bits(&pio->input_sync_bypass, 1u << pin_miso);
uint entry_point = prog_offs + (cpha ? spi_cpha1_cs_offset_entry_point : spi_cpha0_cs_offset_entry_point);
pio_sm_init(pio, sm, entry_point, &c);
pio_sm_exec(pio, sm, pio_encode_set(pio_x, n_bits - 2));
pio_sm_exec(pio, sm, pio_encode_set(pio_y, n_bits - 2));
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,198 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// --------- //
// spi_cpha0 //
// --------- //
#define spi_cpha0_wrap_target 0
#define spi_cpha0_wrap 1
static const uint16_t spi_cpha0_program_instructions[] = {
// .wrap_target
0x6101, // 0: out pins, 1 side 0 [1]
0x5101, // 1: in pins, 1 side 1 [1]
// .wrap
static const struct pio_program spi_cpha0_program = {
.instructions = spi_cpha0_program_instructions,
.length = 2,
.origin = -1,
static inline pio_sm_config spi_cpha0_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + spi_cpha0_wrap_target, offset + spi_cpha0_wrap);
sm_config_set_sideset(&c, 1, false, false);
return c;
// --------- //
// spi_cpha1 //
// --------- //
#define spi_cpha1_wrap_target 0
#define spi_cpha1_wrap 2
static const uint16_t spi_cpha1_program_instructions[] = {
// .wrap_target
0x6021, // 0: out x, 1 side 0
0xb101, // 1: mov pins, x side 1 [1]
0x4001, // 2: in pins, 1 side 0
// .wrap
static const struct pio_program spi_cpha1_program = {
.instructions = spi_cpha1_program_instructions,
.length = 3,
.origin = -1,
static inline pio_sm_config spi_cpha1_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + spi_cpha1_wrap_target, offset + spi_cpha1_wrap);
sm_config_set_sideset(&c, 1, false, false);
return c;
#include "hardware/gpio.h"
static inline void pio_spi_init(PIO pio, uint sm, uint prog_offs, uint n_bits,
float clkdiv, bool cpha, bool cpol, uint pin_sck, uint pin_mosi, uint pin_miso) {
pio_sm_config c = cpha ? spi_cpha1_program_get_default_config(prog_offs) : spi_cpha0_program_get_default_config(prog_offs);
sm_config_set_out_pins(&c, pin_mosi, 1);
sm_config_set_in_pins(&c, pin_miso);
sm_config_set_sideset_pins(&c, pin_sck);
// Only support MSB-first in this example code (shift to left, auto push/pull, threshold=nbits)
sm_config_set_out_shift(&c, false, true, n_bits);
sm_config_set_in_shift(&c, false, true, n_bits);
sm_config_set_clkdiv(&c, clkdiv);
// MOSI, SCK output are low, MISO is input
pio_sm_set_pins_with_mask(pio, sm, 0, (1u << pin_sck) | (1u << pin_mosi));
pio_sm_set_pindirs_with_mask(pio, sm, (1u << pin_sck) | (1u << pin_mosi), (1u << pin_sck) | (1u << pin_mosi) | (1u << pin_miso));
pio_gpio_init(pio, pin_mosi);
pio_gpio_init(pio, pin_miso);
pio_gpio_init(pio, pin_sck);
// The pin muxes can be configured to invert the output (among other things
// and this is a cheesy way to get CPOL=1
gpio_set_outover(pin_sck, cpol ? GPIO_OVERRIDE_INVERT : GPIO_OVERRIDE_NORMAL);
// SPI is synchronous, so bypass input synchroniser to reduce input delay.
hw_set_bits(&pio->input_sync_bypass, 1u << pin_miso);
pio_sm_init(pio, sm, prog_offs, &c);
pio_sm_set_enabled(pio, sm, true);
// ------------ //
// spi_cpha0_cs //
// ------------ //
#define spi_cpha0_cs_wrap_target 0
#define spi_cpha0_cs_wrap 8
#define spi_cpha0_cs_offset_entry_point 8u
static const uint16_t spi_cpha0_cs_program_instructions[] = {
// .wrap_target
0x6101, // 0: out pins, 1 side 0 [1]
0x4801, // 1: in pins, 1 side 1
0x0840, // 2: jmp x--, 0 side 1
0x6001, // 3: out pins, 1 side 0
0xa022, // 4: mov x, y side 0
0x4801, // 5: in pins, 1 side 1
0x08e0, // 6: jmp !osre, 0 side 1
0xa142, // 7: nop side 0 [1]
0x91e0, // 8: pull ifempty block side 2 [1]
// .wrap
static const struct pio_program spi_cpha0_cs_program = {
.instructions = spi_cpha0_cs_program_instructions,
.length = 9,
.origin = -1,
static inline pio_sm_config spi_cpha0_cs_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + spi_cpha0_cs_wrap_target, offset + spi_cpha0_cs_wrap);
sm_config_set_sideset(&c, 2, false, false);
return c;
// ------------ //
// spi_cpha1_cs //
// ------------ //
#define spi_cpha1_cs_wrap_target 0
#define spi_cpha1_cs_wrap 8
#define spi_cpha1_cs_offset_entry_point 7u
static const uint16_t spi_cpha1_cs_program_instructions[] = {
// .wrap_target
0x6901, // 0: out pins, 1 side 1 [1]
0x4001, // 1: in pins, 1 side 0
0x0040, // 2: jmp x--, 0 side 0
0x6801, // 3: out pins, 1 side 1
0xa822, // 4: mov x, y side 1
0x4001, // 5: in pins, 1 side 0
0x00e0, // 6: jmp !osre, 0 side 0
0x91e0, // 7: pull ifempty block side 2 [1]
0xa142, // 8: nop side 0 [1]
// .wrap
static const struct pio_program spi_cpha1_cs_program = {
.instructions = spi_cpha1_cs_program_instructions,
.length = 9,
.origin = -1,
static inline pio_sm_config spi_cpha1_cs_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + spi_cpha1_cs_wrap_target, offset + spi_cpha1_cs_wrap);
sm_config_set_sideset(&c, 2, false, false);
return c;
#include "hardware/gpio.h"
static inline void pio_spi_cs_init(PIO pio, uint sm, uint prog_offs, uint n_bits, float clkdiv, bool cpha, bool cpol,
uint pin_sck, uint pin_mosi, uint pin_miso) {
pio_sm_config c = cpha ? spi_cpha1_cs_program_get_default_config(prog_offs) : spi_cpha0_cs_program_get_default_config(prog_offs);
sm_config_set_out_pins(&c, pin_mosi, 1);
sm_config_set_in_pins(&c, pin_miso);
sm_config_set_sideset_pins(&c, pin_sck);
sm_config_set_out_shift(&c, false, true, n_bits);
sm_config_set_in_shift(&c, false, true, n_bits);
sm_config_set_clkdiv(&c, clkdiv);
pio_sm_set_pins_with_mask(pio, sm, (2u << pin_sck), (3u << pin_sck) | (1u << pin_mosi));
pio_sm_set_pindirs_with_mask(pio, sm, (3u << pin_sck) | (1u << pin_mosi), (3u << pin_sck) | (1u << pin_mosi) | (1u << pin_miso));
pio_gpio_init(pio, pin_mosi);
pio_gpio_init(pio, pin_miso);
pio_gpio_init(pio, pin_sck);
pio_gpio_init(pio, pin_sck + 1);
gpio_set_outover(pin_sck, cpol ? GPIO_OVERRIDE_INVERT : GPIO_OVERRIDE_NORMAL);
hw_set_bits(&pio->input_sync_bypass, 1u << pin_miso);
uint entry_point = prog_offs + (cpha ? spi_cpha1_cs_offset_entry_point : spi_cpha0_cs_offset_entry_point);
pio_sm_init(pio, sm, entry_point, &c);
pio_sm_exec(pio, sm, pio_encode_set(pio_x, n_bits - 2));
pio_sm_exec(pio, sm, pio_encode_set(pio_y, n_bits - 2));
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,13 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program squarewave
set pindirs, 1 ; Set pin to output
set pins, 1 [1] ; Drive pin high and then delay for one cycle
set pins, 0 ; Drive pin low
jmp again ; Set PC to label `again`

@ -0,0 +1,40 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ---------- //
// squarewave //
// ---------- //
#define squarewave_wrap_target 0
#define squarewave_wrap 3
static const uint16_t squarewave_program_instructions[] = {
// .wrap_target
0xe081, // 0: set pindirs, 1
0xe101, // 1: set pins, 1 [1]
0xe000, // 2: set pins, 0
0x0001, // 3: jmp 1
// .wrap
static const struct pio_program squarewave_program = {
.instructions = squarewave_program_instructions,
.length = 4,
.origin = -1,
static inline pio_sm_config squarewave_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + squarewave_wrap_target, offset + squarewave_wrap);
return c;

@ -0,0 +1,19 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
; Note that if you modify squarewave.c to include this program, you'll also
; need to set the wrap registers yourself. This would be handled for you by
; squarewave_program_get_default_config().
.program squarewave_fast
; Like squarewave_wrap, but remove the delay cycles so we can run twice as fast.
set pindirs, 1 ; Set pin to output
set pins, 1 ; Drive pin high
set pins, 0 ; Drive pin low

@ -0,0 +1,39 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// --------------- //
// squarewave_fast //
// --------------- //
#define squarewave_fast_wrap_target 1
#define squarewave_fast_wrap 2
static const uint16_t squarewave_fast_program_instructions[] = {
0xe081, // 0: set pindirs, 1
// .wrap_target
0xe001, // 1: set pins, 1
0xe000, // 2: set pins, 0
// .wrap
static const struct pio_program squarewave_fast_program = {
.instructions = squarewave_fast_program_instructions,
.length = 3,
.origin = -1,
static inline pio_sm_config squarewave_fast_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + squarewave_fast_wrap_target, offset + squarewave_fast_wrap);
return c;

@ -0,0 +1,12 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program squarewave
set pindirs, 1 ; Set pin to output
again: set pins, 1 [1] ; Drive pin high and then delay for one cycle
set pins, 0 ; Drive pin low
jmp again ; Set PC to label `again`

@ -0,0 +1,19 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
; Note that if you modify squarewave.c to include this program, you'll also
; need to set the wrap registers yourself. This would be handled for you by
; squarewave_program_get_default_config().
.program squarewave_wrap
; Like squarewave, but use the state machine's .wrap hardware instead of an
; explicit jmp. This is a free (0-cycle) unconditional jump.
set pindirs, 1 ; Set pin to output
set pins, 1 [1] ; Drive pin high and then delay for one cycle
set pins, 0 [1] ; Drive pin low and then delay for one cycle

@ -0,0 +1,39 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// --------------- //
// squarewave_wrap //
// --------------- //
#define squarewave_wrap_wrap_target 1
#define squarewave_wrap_wrap 2
static const uint16_t squarewave_wrap_program_instructions[] = {
0xe081, // 0: set pindirs, 1
// .wrap_target
0xe101, // 1: set pins, 1 [1]
0xe100, // 2: set pins, 0 [1]
// .wrap
static const struct pio_program squarewave_wrap_program = {
.instructions = squarewave_wrap_program_instructions,
.length = 3,
.origin = -1,
static inline pio_sm_config squarewave_wrap_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + squarewave_wrap_wrap_target, offset + squarewave_wrap_wrap);
return c;

@ -0,0 +1,57 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program st7789_lcd
.side_set 1
; This is just a simple clocked serial TX. At 125 MHz system clock we can
; sustain up to 62.5 Mbps.
; Data on OUT pin 0
; Clock on side-set pin 0
out pins, 1 side 0 ; stall here if no data (clock low)
nop side 1
% c-sdk {
// For optimal use of DMA bandwidth we would use an autopull threshold of 32,
// but we are using a threshold of 8 here (consume 1 byte from each FIFO entry
// and discard the remainder) to make things easier for software on the other side
static inline void st7789_lcd_program_init(PIO pio, uint sm, uint offset, uint data_pin, uint clk_pin, float clk_div) {
pio_gpio_init(pio, data_pin);
pio_gpio_init(pio, clk_pin);
pio_sm_set_consecutive_pindirs(pio, sm, data_pin, 1, true);
pio_sm_set_consecutive_pindirs(pio, sm, clk_pin, 1, true);
pio_sm_config c = st7789_lcd_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, clk_pin);
sm_config_set_out_pins(&c, data_pin, 1);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
sm_config_set_clkdiv(&c, clk_div);
sm_config_set_out_shift(&c, false, true, 8);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
// Making use of the narrow store replication behaviour on RP2040 to get the
// data left-justified (as we are using shift-to-left to get MSB-first serial)
static inline void st7789_lcd_put(PIO pio, uint sm, uint8_t x) {
while (pio_sm_is_tx_fifo_full(pio, sm))
*(volatile uint8_t*)&pio->txf[sm] = x;
// SM is done when it stalls on an empty FIFO
static inline void st7789_lcd_wait_idle(PIO pio, uint sm) {
uint32_t sm_stall_mask = 1u << (sm + PIO_FDEBUG_TXSTALL_LSB);
pio->fdebug = sm_stall_mask;
while (!(pio->fdebug & sm_stall_mask))

@ -0,0 +1,72 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ---------- //
// st7789_lcd //
// ---------- //
#define st7789_lcd_wrap_target 0
#define st7789_lcd_wrap 1
static const uint16_t st7789_lcd_program_instructions[] = {
// .wrap_target
0x6001, // 0: out pins, 1 side 0
0xb042, // 1: nop side 1
// .wrap
static const struct pio_program st7789_lcd_program = {
.instructions = st7789_lcd_program_instructions,
.length = 2,
.origin = -1,
static inline pio_sm_config st7789_lcd_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + st7789_lcd_wrap_target, offset + st7789_lcd_wrap);
sm_config_set_sideset(&c, 1, false, false);
return c;
// For optimal use of DMA bandwidth we would use an autopull threshold of 32,
// but we are using a threshold of 8 here (consume 1 byte from each FIFO entry
// and discard the remainder) to make things easier for software on the other side
static inline void st7789_lcd_program_init(PIO pio, uint sm, uint offset, uint data_pin, uint clk_pin, float clk_div) {
pio_gpio_init(pio, data_pin);
pio_gpio_init(pio, clk_pin);
pio_sm_set_consecutive_pindirs(pio, sm, data_pin, 1, true);
pio_sm_set_consecutive_pindirs(pio, sm, clk_pin, 1, true);
pio_sm_config c = st7789_lcd_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, clk_pin);
sm_config_set_out_pins(&c, data_pin, 1);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
sm_config_set_clkdiv(&c, clk_div);
sm_config_set_out_shift(&c, false, true, 8);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
// Making use of the narrow store replication behaviour on RP2040 to get the
// data left-justified (as we are using shift-to-left to get MSB-first serial)
static inline void st7789_lcd_put(PIO pio, uint sm, uint8_t x) {
while (pio_sm_is_tx_fifo_full(pio, sm))
*(volatile uint8_t*)&pio->txf[sm] = x;
// SM is done when it stalls on an empty FIFO
static inline void st7789_lcd_wait_idle(PIO pio, uint sm) {
uint32_t sm_stall_mask = 1u << (sm + PIO_FDEBUG_TXSTALL_LSB);
pio->fdebug = sm_stall_mask;
while (!(pio->fdebug & sm_stall_mask))

@ -0,0 +1,94 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program uart_rx_mini
; Minimum viable 8n1 UART receiver. Wait for the start bit, then sample 8 bits
; with the correct timing.
; IN pin 0 is mapped to the GPIO used as UART RX.
; Autopush must be enabled, with a threshold of 8.
wait 0 pin 0 ; Wait for start bit
set x, 7 [10] ; Preload bit counter, delay until eye of first data bit
bitloop: ; Loop 8 times
in pins, 1 ; Sample data
jmp x-- bitloop [6] ; Each iteration is 8 cycles
% c-sdk {
#include "hardware/clocks.h"
#include "hardware/gpio.h"
static inline void uart_rx_mini_program_init(PIO pio, uint sm, uint offset, uint pin, uint baud) {
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
pio_gpio_init(pio, pin);
pio_sm_config c = uart_rx_mini_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT, IN
// Shift to right, autopush enabled
sm_config_set_in_shift(&c, true, true, 8);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
// SM transmits 1 bit per 8 execution cycles.
float div = (float)clock_get_hz(clk_sys) / (8 * baud);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
.program uart_rx
; Slightly more fleshed-out 8n1 UART receiver which handles framing errors and
; break conditions more gracefully.
; IN pin 0 and JMP pin are both mapped to the GPIO used as UART RX.
wait 0 pin 0 ; Stall until start bit is asserted
set x, 7 [10] ; Preload bit counter, then delay until halfway through
bitloop: ; the first data bit (12 cycles incl wait, set).
in pins, 1 ; Shift data bit into ISR
jmp x-- bitloop [6] ; Loop 8 times, each loop iteration is 8 cycles
jmp pin good_stop ; Check stop bit (should be high)
irq 4 rel ; Either a framing error or a break. Set a sticky flag,
wait 1 pin 0 ; and wait for line to return to idle state.
jmp start ; Don't push data if we didn't see good framing.
good_stop: ; No delay before returning to start; a little slack is
push ; important in case the TX clock is slightly too fast.
% c-sdk {
static inline void uart_rx_program_init(PIO pio, uint sm, uint offset, uint pin, uint baud) {
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
pio_gpio_init(pio, pin);
pio_sm_config c = uart_rx_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT, IN
sm_config_set_jmp_pin(&c, pin); // for JMP
// Shift to right, autopush disabled
sm_config_set_in_shift(&c, true, false, 32);
// Deeper FIFO as we're not doing any TX
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
// SM transmits 1 bit per 8 execution cycles.
float div = (float)clock_get_hz(clk_sys) / (8 * baud);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
static inline char uart_rx_program_getc(PIO pio, uint sm) {
// 8-bit read from the uppermost byte of the FIFO, as data is left-justified
io_rw_8 *rxfifo_shift = (io_rw_8*)&pio->rxf[sm] + 3;
while (pio_sm_is_rx_fifo_empty(pio, sm))
return (char)*rxfifo_shift;

@ -0,0 +1,120 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ------------ //
// uart_rx_mini //
// ------------ //
#define uart_rx_mini_wrap_target 0
#define uart_rx_mini_wrap 3
static const uint16_t uart_rx_mini_program_instructions[] = {
// .wrap_target
0x2020, // 0: wait 0 pin, 0
0xea27, // 1: set x, 7 [10]
0x4001, // 2: in pins, 1
0x0642, // 3: jmp x--, 2 [6]
// .wrap
static const struct pio_program uart_rx_mini_program = {
.instructions = uart_rx_mini_program_instructions,
.length = 4,
.origin = -1,
static inline pio_sm_config uart_rx_mini_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + uart_rx_mini_wrap_target, offset + uart_rx_mini_wrap);
return c;
#include "hardware/clocks.h"
#include "hardware/gpio.h"
static inline void uart_rx_mini_program_init(PIO pio, uint sm, uint offset, uint pin, uint baud) {
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
pio_gpio_init(pio, pin);
pio_sm_config c = uart_rx_mini_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT, IN
// Shift to right, autopush enabled
sm_config_set_in_shift(&c, true, true, 8);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
// SM transmits 1 bit per 8 execution cycles.
float div = (float)clock_get_hz(clk_sys) / (8 * baud);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
// ------- //
// uart_rx //
// ------- //
#define uart_rx_wrap_target 0
#define uart_rx_wrap 8
static const uint16_t uart_rx_program_instructions[] = {
// .wrap_target
0x2020, // 0: wait 0 pin, 0
0xea27, // 1: set x, 7 [10]
0x4001, // 2: in pins, 1
0x0642, // 3: jmp x--, 2 [6]
0x00c8, // 4: jmp pin, 8
0xc014, // 5: irq nowait 4 rel
0x20a0, // 6: wait 1 pin, 0
0x0000, // 7: jmp 0
0x8020, // 8: push block
// .wrap
static const struct pio_program uart_rx_program = {
.instructions = uart_rx_program_instructions,
.length = 9,
.origin = -1,
static inline pio_sm_config uart_rx_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + uart_rx_wrap_target, offset + uart_rx_wrap);
return c;
static inline void uart_rx_program_init(PIO pio, uint sm, uint offset, uint pin, uint baud) {
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
pio_gpio_init(pio, pin);
pio_sm_config c = uart_rx_program_get_default_config(offset);
sm_config_set_in_pins(&c, pin); // for WAIT, IN
sm_config_set_jmp_pin(&c, pin); // for JMP
// Shift to right, autopush disabled
sm_config_set_in_shift(&c, true, false, 32);
// Deeper FIFO as we're not doing any TX
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
// SM transmits 1 bit per 8 execution cycles.
float div = (float)clock_get_hz(clk_sys) / (8 * baud);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
static inline char uart_rx_program_getc(PIO pio, uint sm) {
// 8-bit read from the uppermost byte of the FIFO, as data is left-justified
io_rw_8 *rxfifo_shift = (io_rw_8*)&pio->rxf[sm] + 3;
while (pio_sm_is_rx_fifo_empty(pio, sm))
return (char)*rxfifo_shift;

@ -0,0 +1,61 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program uart_tx
.side_set 1 opt
; An 8n1 UART transmit program.
; OUT pin 0 and side-set pin 0 are both mapped to UART TX pin.
pull side 1 [7] ; Assert stop bit, or stall with line in idle state
set x, 7 side 0 [7] ; Preload bit counter, assert start bit for 8 clocks
bitloop: ; This loop will run 8 times (8n1 UART)
out pins, 1 ; Shift 1 bit from OSR to the first OUT pin
jmp x-- bitloop [6] ; Each loop iteration is 8 cycles.
% c-sdk {
#include "hardware/clocks.h"
static inline void uart_tx_program_init(PIO pio, uint sm, uint offset, uint pin_tx, uint baud) {
// Tell PIO to initially drive output-high on the selected pin, then map PIO
// onto that pin with the IO muxes.
pio_sm_set_pins_with_mask(pio, sm, 1u << pin_tx, 1u << pin_tx);
pio_sm_set_pindirs_with_mask(pio, sm, 1u << pin_tx, 1u << pin_tx);
pio_gpio_init(pio, pin_tx);
pio_sm_config c = uart_tx_program_get_default_config(offset);
// OUT shifts to right, no autopull
sm_config_set_out_shift(&c, true, false, 32);
// We are mapping both OUT and side-set to the same pin, because sometimes
// we need to assert user data onto the pin (with OUT) and sometimes
// assert constant values (start/stop bit)
sm_config_set_out_pins(&c, pin_tx, 1);
sm_config_set_sideset_pins(&c, pin_tx);
// We only need TX, so get an 8-deep FIFO!
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
// SM transmits 1 bit per 8 execution cycles.
float div = (float)clock_get_hz(clk_sys) / (8 * baud);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
static inline void uart_tx_program_putc(PIO pio, uint sm, char c) {
pio_sm_put_blocking(pio, sm, (uint32_t)c);
static inline void uart_tx_program_puts(PIO pio, uint sm, const char *s) {
while (*s)
uart_tx_program_putc(pio, sm, *s++);

@ -0,0 +1,73 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ------- //
// uart_tx //
// ------- //
#define uart_tx_wrap_target 0
#define uart_tx_wrap 3
static const uint16_t uart_tx_program_instructions[] = {
// .wrap_target
0x9fa0, // 0: pull block side 1 [7]
0xf727, // 1: set x, 7 side 0 [7]
0x6001, // 2: out pins, 1
0x0642, // 3: jmp x--, 2 [6]
// .wrap
static const struct pio_program uart_tx_program = {
.instructions = uart_tx_program_instructions,
.length = 4,
.origin = -1,
static inline pio_sm_config uart_tx_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + uart_tx_wrap_target, offset + uart_tx_wrap);
sm_config_set_sideset(&c, 2, true, false);
return c;
#include "hardware/clocks.h"
static inline void uart_tx_program_init(PIO pio, uint sm, uint offset, uint pin_tx, uint baud) {
// Tell PIO to initially drive output-high on the selected pin, then map PIO
// onto that pin with the IO muxes.
pio_sm_set_pins_with_mask(pio, sm, 1u << pin_tx, 1u << pin_tx);
pio_sm_set_pindirs_with_mask(pio, sm, 1u << pin_tx, 1u << pin_tx);
pio_gpio_init(pio, pin_tx);
pio_sm_config c = uart_tx_program_get_default_config(offset);
// OUT shifts to right, no autopull
sm_config_set_out_shift(&c, true, false, 32);
// We are mapping both OUT and side-set to the same pin, because sometimes
// we need to assert user data onto the pin (with OUT) and sometimes
// assert constant values (start/stop bit)
sm_config_set_out_pins(&c, pin_tx, 1);
sm_config_set_sideset_pins(&c, pin_tx);
// We only need TX, so get an 8-deep FIFO!
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
// SM transmits 1 bit per 8 execution cycles.
float div = (float)clock_get_hz(clk_sys) / (8 * baud);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
static inline void uart_tx_program_putc(PIO pio, uint sm, char c) {
pio_sm_put_blocking(pio, sm, (uint32_t)c);
static inline void uart_tx_program_puts(PIO pio, uint sm, const char *s) {
while (*s)
uart_tx_program_putc(pio, sm, *s++);

@ -0,0 +1,85 @@
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
; SPDX-License-Identifier: BSD-3-Clause
.program ws2812
.side_set 1
.define public T1 2
.define public T2 5
.define public T3 3
.lang_opt python sideset_init = pico.PIO.OUT_HIGH
.lang_opt python out_init = pico.PIO.OUT_HIGH
.lang_opt python out_shiftdir = 1
out x, 1 side 0 [T3 - 1] ; Side-set still takes place when instruction stalls
jmp !x do_zero side 1 [T1 - 1] ; Branch on the bit we shifted out. Positive pulse
jmp bitloop side 1 [T2 - 1] ; Continue driving high, for a long pulse
nop side 0 [T2 - 1] ; Or drive low, for a short pulse
% c-sdk {
#include "hardware/clocks.h"
static inline void ws2812_program_init(PIO pio, uint sm, uint offset, uint pin, float freq, bool rgbw) {
pio_gpio_init(pio, pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_sm_config c = ws2812_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, pin);
sm_config_set_out_shift(&c, false, true, rgbw ? 32 : 24);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
int cycles_per_bit = ws2812_T1 + ws2812_T2 + ws2812_T3;
float div = clock_get_hz(clk_sys) / (freq * cycles_per_bit);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
.program ws2812_parallel
.define public T1 2
.define public T2 5
.define public T3 3
out x, 32
mov pins, !null [T1-1]
mov pins, x [T2-1]
mov pins, null [T3-2]
% c-sdk {
#include "hardware/clocks.h"
static inline void ws2812_parallel_program_init(PIO pio, uint sm, uint offset, uint pin_base, uint pin_count, float freq) {
for(uint i=pin_base; i<pin_base+pin_count; i++) {
pio_gpio_init(pio, i);
pio_sm_set_consecutive_pindirs(pio, sm, pin_base, pin_count, true);
pio_sm_config c = ws2812_parallel_program_get_default_config(offset);
sm_config_set_out_shift(&c, true, true, 32);
sm_config_set_out_pins(&c, pin_base, pin_count);
sm_config_set_set_pins(&c, pin_base, pin_count);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
int cycles_per_bit = ws2812_parallel_T1 + ws2812_parallel_T2 + ws2812_parallel_T3;
float div = clock_get_hz(clk_sys) / (freq * cycles_per_bit);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

@ -0,0 +1,114 @@
// -------------------------------------------------- //
// This file is autogenerated by pioasm; do not edit! //
// -------------------------------------------------- //
#pragma once
#include "hardware/pio.h"
// ------ //
// ws2812 //
// ------ //
#define ws2812_wrap_target 0
#define ws2812_wrap 3
#define ws2812_T1 2
#define ws2812_T2 5
#define ws2812_T3 3
static const uint16_t ws2812_program_instructions[] = {
// .wrap_target
0x6221, // 0: out x, 1 side 0 [2]
0x1123, // 1: jmp !x, 3 side 1 [1]
0x1400, // 2: jmp 0 side 1 [4]
0xa442, // 3: nop side 0 [4]
// .wrap
static const struct pio_program ws2812_program = {
.instructions = ws2812_program_instructions,
.length = 4,
.origin = -1,
static inline pio_sm_config ws2812_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + ws2812_wrap_target, offset + ws2812_wrap);
sm_config_set_sideset(&c, 1, false, false);
return c;
#include "hardware/clocks.h"
static inline void ws2812_program_init(PIO pio, uint sm, uint offset, uint pin, float freq, bool rgbw) {
pio_gpio_init(pio, pin);
pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true);
pio_sm_config c = ws2812_program_get_default_config(offset);
sm_config_set_sideset_pins(&c, pin);
sm_config_set_out_shift(&c, false, true, rgbw ? 32 : 24);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
int cycles_per_bit = ws2812_T1 + ws2812_T2 + ws2812_T3;
float div = clock_get_hz(clk_sys) / (freq * cycles_per_bit);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
// --------------- //
// ws2812_parallel //
// --------------- //
#define ws2812_parallel_wrap_target 0
#define ws2812_parallel_wrap 3
#define ws2812_parallel_T1 2
#define ws2812_parallel_T2 5
#define ws2812_parallel_T3 3
static const uint16_t ws2812_parallel_program_instructions[] = {
// .wrap_target
0x6020, // 0: out x, 32
0xa10b, // 1: mov pins, !null [1]
0xa401, // 2: mov pins, x [4]
0xa103, // 3: mov pins, null [1]
// .wrap
static const struct pio_program ws2812_parallel_program = {
.instructions = ws2812_parallel_program_instructions,
.length = 4,
.origin = -1,
static inline pio_sm_config ws2812_parallel_program_get_default_config(uint offset) {
pio_sm_config c = pio_get_default_sm_config();
sm_config_set_wrap(&c, offset + ws2812_parallel_wrap_target, offset + ws2812_parallel_wrap);
return c;
#include "hardware/clocks.h"
static inline void ws2812_parallel_program_init(PIO pio, uint sm, uint offset, uint pin_base, uint pin_count, float freq) {
for(uint i=pin_base; i<pin_base+pin_count; i++) {
pio_gpio_init(pio, i);
pio_sm_set_consecutive_pindirs(pio, sm, pin_base, pin_count, true);
pio_sm_config c = ws2812_parallel_program_get_default_config(offset);
sm_config_set_out_shift(&c, true, true, 32);
sm_config_set_out_pins(&c, pin_base, pin_count);
sm_config_set_set_pins(&c, pin_base, pin_count);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
int cycles_per_bit = ws2812_parallel_T1 + ws2812_parallel_T2 + ws2812_parallel_T3;
float div = clock_get_hz(clk_sys) / (freq * cycles_per_bit);
sm_config_set_clkdiv(&c, div);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff