PreProcessor rewritten, ready for script blocks, no tokenization, emits Kind of source line (if asm, script or source)

This commit is contained in:
Mattias Hansson 2025-10-17 16:59:41 +02:00
parent 8facce2593
commit ce45f71b99
2 changed files with 404 additions and 155 deletions

View file

@ -3,17 +3,24 @@ package preproc
import (
"fmt"
"path/filepath"
//"path/filepath"
"strings"
)
type LineKind int
const (
Source LineKind = iota
Assembler
Script
)
// Line represents one post-processed source line and its provenance.
type Line struct {
Text string // post-preprocessor line text (after define replacement)
RawText string // original line text before any processing
Text string // post-preprocessor line text (after define replacement, comment stripping)
Filename string // file the line came from (after resolving includes)
LineNo int // 1-based line number in Filename
Tokens []string // whitespace-split tokens from Text (space or tab; consecutive collapsed)
Kind LineKind // Source, Assembler, or Script
PragmaSetIndex int // index into Pragma stack for this line
}
@ -42,6 +49,7 @@ type preproc struct {
pragma *Pragma // pragma handler
cond []bool // conditional stack; a line is active if all are true
inAsm bool // true when inside ASM/ENDASM block
inScript bool // true when inside SCRIPT/ENDSCRIPT block
reader FileReader // file reader abstraction
}
@ -51,6 +59,7 @@ func newPreproc(reader FileReader) *preproc {
pragma: NewPragma(),
cond: []bool{},
inAsm: false,
inScript: false,
reader: reader,
}
}
@ -106,39 +115,48 @@ func (p *preproc) run(root string) ([]Line, error) {
tokens := strings.Fields(raw)
// ASM mode handling
if !p.inAsm {
if !p.inAsm && !p.inScript {
// Check for ASM entry
if includeSource && len(tokens) > 0 && tokens[0] == "ASM" {
p.inAsm = true
out = append(out, Line{
Text: raw,
Filename: currFrame.path,
LineNo: currFrame.line,
Tokens: []string{},
PragmaSetIndex: p.pragma.GetCurrentPragmaSetIndex(),
})
continue
continue // don't emit ASM marker
}
} else {
// Check for SCRIPT entry
if includeSource && len(tokens) > 0 && tokens[0] == "SCRIPT" {
p.inScript = true
continue // don't emit SCRIPT marker
}
} else if p.inAsm {
// We're in ASM mode
// Check for ENDASM
if len(tokens) > 0 && tokens[0] == "ENDASM" {
p.inAsm = false
continue // don't emit ENDASM marker
}
// Otherwise emit line verbatim as Assembler
out = append(out, Line{
RawText: raw,
Text: raw,
Filename: currFrame.path,
LineNo: currFrame.line,
Tokens: []string{},
Kind: Assembler,
PragmaSetIndex: p.pragma.GetCurrentPragmaSetIndex(),
})
continue
} else if p.inScript {
// We're in SCRIPT mode
// Check for ENDSCRIPT
if len(tokens) > 0 && tokens[0] == "ENDSCRIPT" {
p.inScript = false
continue // don't emit ENDSCRIPT marker
}
// Otherwise emit line verbatim
// Otherwise emit line verbatim as Script
out = append(out, Line{
RawText: raw,
Text: raw,
Filename: currFrame.path,
LineNo: currFrame.line,
Tokens: []string{},
Kind: Script,
PragmaSetIndex: p.pragma.GetCurrentPragmaSetIndex(),
})
continue
@ -239,13 +257,21 @@ func (p *preproc) run(root string) ([]Line, error) {
continue
}
// Non-directive: expand defines and emit.
// Non-directive Source line: expand defines, strip comments, emit
text := p.defs.ReplaceDefines(raw)
// Strip comments (everything after //)
if idx := strings.Index(text, "//"); idx >= 0 {
text = text[:idx]
}
text = strings.TrimRight(text, " \t")
out = append(out, Line{
RawText: raw,
Text: text,
Filename: currFrame.path,
LineNo: currFrame.line,
Tokens: strings.Fields(text),
Kind: Source,
PragmaSetIndex: p.pragma.GetCurrentPragmaSetIndex(),
})
}

View file

@ -26,6 +26,10 @@ func TestPreProcess_BasicDefine(t *testing.T) {
if lines[0].Text != "LDA #42" {
t.Errorf("expected 'LDA #42', got %q", lines[0].Text)
}
if lines[0].Kind != Source {
t.Errorf("expected Kind=Source, got %v", lines[0].Kind)
}
}
func TestPreProcess_DefineExpansion(t *testing.T) {
@ -72,7 +76,6 @@ func TestPreProcess_IncludeGuard(t *testing.T) {
t.Fatalf("PreProcess failed: %v", err)
}
// First include emits LABEL, second include is blocked by guard
if len(lines) != 2 {
t.Fatalf("expected 2 lines, got %d", len(lines))
}
@ -102,18 +105,254 @@ func TestPreProcess_ASMBlock(t *testing.T) {
t.Fatalf("PreProcess failed: %v", err)
}
if len(lines) != 5 {
t.Fatalf("expected 5 lines, got %d", len(lines))
// ASM and ENDASM markers are stripped, so only 2 asm lines + 1 source line
if len(lines) != 3 {
t.Fatalf("expected 3 lines, got %d", len(lines))
}
// ASM content should NOT be processed
if lines[1].Text != " lda #FOO" {
t.Errorf("expected ' lda #FOO', got %q", lines[1].Text)
if lines[0].Text != " lda #FOO" {
t.Errorf("expected ' lda #FOO', got %q", lines[0].Text)
}
if lines[0].Kind != Assembler {
t.Errorf("expected Kind=Assembler, got %v", lines[0].Kind)
}
if lines[1].Text != " sta $d020" {
t.Errorf("expected ' sta $d020', got %q", lines[1].Text)
}
if lines[1].Kind != Assembler {
t.Errorf("expected Kind=Assembler, got %v", lines[1].Kind)
}
// After ENDASM, defines work again
if lines[4].Text != "LDA #42" {
t.Errorf("expected 'LDA #42', got %q", lines[4].Text)
if lines[2].Text != "LDA #42" {
t.Errorf("expected 'LDA #42', got %q", lines[2].Text)
}
if lines[2].Kind != Source {
t.Errorf("expected Kind=Source, got %v", lines[2].Kind)
}
}
func TestPreProcess_ScriptBlock(t *testing.T) {
files := map[string][]string{
"test.c65": {
"#DEFINE VAR = 100",
"SCRIPT",
" x = VAR + 1",
" print(x)",
"ENDSCRIPT",
"LDA #VAR",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
// SCRIPT and ENDSCRIPT markers are stripped
if len(lines) != 3 {
t.Fatalf("expected 3 lines, got %d", len(lines))
}
// Script content should NOT be processed
if lines[0].Text != " x = VAR + 1" {
t.Errorf("expected ' x = VAR + 1', got %q", lines[0].Text)
}
if lines[0].Kind != Script {
t.Errorf("expected Kind=Script, got %v", lines[0].Kind)
}
if lines[1].Text != " print(x)" {
t.Errorf("expected ' print(x)', got %q", lines[1].Text)
}
if lines[1].Kind != Script {
t.Errorf("expected Kind=Script, got %v", lines[1].Kind)
}
// After ENDSCRIPT, defines work again
if lines[2].Text != "LDA #100" {
t.Errorf("expected 'LDA #100', got %q", lines[2].Text)
}
if lines[2].Kind != Source {
t.Errorf("expected Kind=Source, got %v", lines[2].Kind)
}
}
func TestPreProcess_CommentStripping(t *testing.T) {
files := map[string][]string{
"test.c65": {
"LDA #42 // load accumulator",
"STA $D020 // border color",
"NOP // comment only",
"JMP $0810",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
if len(lines) != 4 {
t.Fatalf("expected 4 lines, got %d", len(lines))
}
if lines[0].Text != "LDA #42" {
t.Errorf("expected 'LDA #42', got %q", lines[0].Text)
}
if lines[0].RawText != "LDA #42 // load accumulator" {
t.Errorf("expected RawText to preserve comment, got %q", lines[0].RawText)
}
if lines[1].Text != "STA $D020" {
t.Errorf("expected 'STA $D020', got %q", lines[1].Text)
}
if lines[2].Text != "NOP" {
t.Errorf("expected 'NOP', got %q", lines[2].Text)
}
if lines[3].Text != "JMP $0810" {
t.Errorf("expected 'JMP $0810', got %q", lines[3].Text)
}
}
func TestPreProcess_CommentInASMBlock(t *testing.T) {
files := map[string][]string{
"test.c65": {
"ASM",
" lda #42 // this comment stays",
" sta $d020 // this too",
"ENDASM",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
if len(lines) != 2 {
t.Fatalf("expected 2 lines, got %d", len(lines))
}
// Comments should be preserved in ASM blocks
if lines[0].Text != " lda #42 // this comment stays" {
t.Errorf("expected comment preserved, got %q", lines[0].Text)
}
if lines[1].Text != " sta $d020 // this too" {
t.Errorf("expected comment preserved, got %q", lines[1].Text)
}
}
func TestPreProcess_CommentInScriptBlock(t *testing.T) {
files := map[string][]string{
"test.c65": {
"SCRIPT",
" x = 1 // script comment",
" y = 2 // another one",
"ENDSCRIPT",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
if len(lines) != 2 {
t.Fatalf("expected 2 lines, got %d", len(lines))
}
// Comments should be preserved in Script blocks
if lines[0].Text != " x = 1 // script comment" {
t.Errorf("expected comment preserved, got %q", lines[0].Text)
}
if lines[1].Text != " y = 2 // another one" {
t.Errorf("expected comment preserved, got %q", lines[1].Text)
}
}
func TestPreProcess_RawTextPreservation(t *testing.T) {
files := map[string][]string{
"test.c65": {
"#DEFINE FOO = 42",
"LDA #FOO // comment here",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
if len(lines) != 1 {
t.Fatalf("expected 1 line, got %d", len(lines))
}
// RawText should be original
if lines[0].RawText != "LDA #FOO // comment here" {
t.Errorf("expected RawText 'LDA #FOO // comment here', got %q", lines[0].RawText)
}
// Text should be processed
if lines[0].Text != "LDA #42" {
t.Errorf("expected Text 'LDA #42', got %q", lines[0].Text)
}
}
func TestPreProcess_MismatchedBlockTerminators(t *testing.T) {
tests := []struct {
name string
lines []string
}{
{
name: "ASM ended with ENDSCRIPT",
lines: []string{
"ASM",
" lda #42",
"ENDSCRIPT",
"NOP",
},
},
{
name: "SCRIPT ended with ENDASM",
lines: []string{
"SCRIPT",
" x = 1",
"ENDASM",
"NOP",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
files := map[string][]string{
"test.c65": tt.lines,
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
// Wrong terminator won't close the block
// All lines including the wrong terminator should be in the block
if len(lines) < 2 {
t.Errorf("expected at least 2 lines, got %d", len(lines))
}
// The mismatched terminator should be treated as block content
// and NOP should still be in the block too
for _, line := range lines {
if line.Kind == Source {
t.Errorf("found Source line when all should be in block: %q", line.Text)
}
}
})
}
}
@ -295,7 +534,6 @@ func TestPreProcess_PragmaWithDefines(t *testing.T) {
t.Fatalf("PreProcess failed: %v", err)
}
// Pragma should have been processed with define expansion
if len(lines) != 1 {
t.Fatalf("expected 1 line, got %d", len(lines))
}
@ -368,63 +606,6 @@ func TestPreProcess_ConditionalHalt(t *testing.T) {
}
}
func TestPreProcess_Tokens(t *testing.T) {
files := map[string][]string{
"test.c65": {
"LDA #$42",
" STA $D020 ",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
expectedTokens := [][]string{
{"LDA", "#$42"},
{"STA", "$D020"},
}
for i, expected := range expectedTokens {
if len(lines[i].Tokens) != len(expected) {
t.Errorf("line %d: expected %d tokens, got %d", i, len(expected), len(lines[i].Tokens))
continue
}
for j, tok := range expected {
if lines[i].Tokens[j] != tok {
t.Errorf("line %d token %d: expected %q, got %q", i, j, tok, lines[i].Tokens[j])
}
}
}
}
func TestPreProcess_ASMTokens(t *testing.T) {
files := map[string][]string{
"test.c65": {
"ASM",
" lda #$42",
"ENDASM",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
// ASM and ENDASM have empty token arrays
if len(lines[0].Tokens) != 0 {
t.Errorf("ASM should have empty tokens, got %d", len(lines[0].Tokens))
}
if len(lines[1].Tokens) != 0 {
t.Errorf("ASM content should have empty tokens, got %d", len(lines[1].Tokens))
}
if len(lines[2].Tokens) != 0 {
t.Errorf("ENDASM should have empty tokens, got %d", len(lines[2].Tokens))
}
}
func TestPreProcess_ComplexIncludeGuard(t *testing.T) {
files := map[string][]string{
"c64scr.c65": {
@ -452,7 +633,6 @@ func TestPreProcess_ComplexIncludeGuard(t *testing.T) {
}
// First include should emit the library, second should be blocked
// Count non-directive lines from first include
count := 0
for _, line := range lines {
if strings.Contains(line.Text, "lda $d011") {
@ -461,7 +641,7 @@ func TestPreProcess_ComplexIncludeGuard(t *testing.T) {
}
if count != 1 {
t.Errorf("expected 1 lib lines from multiple includes of same file, got %d", count)
t.Errorf("expected 1 occurrence from multiple includes of same file, got %d", count)
}
}
@ -535,52 +715,6 @@ func TestPreProcess_FilenameAndLineNumberTracking(t *testing.T) {
}
}
func TestPreProcess_Tokenization(t *testing.T) {
files := map[string][]string{
"test.c65": {
"LDA #$42", // simple tokens
" STA $D020 ", // leading/trailing whitespace, multiple spaces
"LET var = $1000", // multiple tokens with =
" JMP $0810", // tabs
" CALL func ( a b c )", // spaces around everything
"", // empty line
"NOP", // single token
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
expected := [][]string{
{"LDA", "#$42"},
{"STA", "$D020"},
{"LET", "var", "=", "$1000"},
{"JMP", "$0810"},
{"CALL", "func", "(", "a", "b", "c", ")"},
{}, // empty line has no tokens
{"NOP"},
}
if len(lines) != len(expected) {
t.Fatalf("expected %d lines, got %d", len(expected), len(lines))
}
for i, exp := range expected {
if len(lines[i].Tokens) != len(exp) {
t.Errorf("line %d: expected %d tokens, got %d (tokens: %v)",
i, len(exp), len(lines[i].Tokens), lines[i].Tokens)
continue
}
for j, tok := range exp {
if lines[i].Tokens[j] != tok {
t.Errorf("line %d token %d: expected %q, got %q", i, j, tok, lines[i].Tokens[j])
}
}
}
}
func TestPreProcess_PragmaTracking(t *testing.T) {
files := map[string][]string{
"test.c65": {
@ -657,8 +791,8 @@ func TestPreProcess_Halt(t *testing.T) {
"LINE1",
"LINE2",
"#HALT",
"LINE3", // should not be emitted
"LINE4", // should not be emitted
"LINE3",
"LINE4",
},
}
reader := NewMockFileReader(files)
@ -672,7 +806,6 @@ func TestPreProcess_Halt(t *testing.T) {
t.Fatalf("expected HaltError, got %T: %v", err, err)
}
// Lines before HALT should still be processed
if len(lines) != 2 {
t.Errorf("expected 2 lines before halt, got %d", len(lines))
}
@ -691,12 +824,12 @@ func TestPreProcess_HaltInInclude(t *testing.T) {
"main.c65": {
"MAIN_LINE1",
"#INCLUDE lib.c65",
"MAIN_LINE2", // should not be reached
"MAIN_LINE2",
},
"lib.c65": {
"LIB_LINE1",
"#HALT",
"LIB_LINE2", // should not be emitted
"LIB_LINE2",
},
}
reader := NewMockFileReader(files)
@ -710,7 +843,6 @@ func TestPreProcess_HaltInInclude(t *testing.T) {
t.Fatalf("expected HaltError, got %T: %v", err, err)
}
// Should have MAIN_LINE1 and LIB_LINE1
if len(lines) != 2 {
t.Fatalf("expected 2 lines before halt, got %d", len(lines))
}
@ -723,3 +855,94 @@ func TestPreProcess_HaltInInclude(t *testing.T) {
t.Errorf("expected LIB_LINE1, got %q", lines[1].Text)
}
}
func TestPreProcess_MixedBlocksAndComments(t *testing.T) {
files := map[string][]string{
"test.c65": {
"#DEFINE X = 10",
"LDA #X // source comment",
"ASM",
" lda #X // asm comment",
"ENDASM",
"SCRIPT",
" y = X // script comment",
"ENDSCRIPT",
"STA $D020 // another source comment",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
expected := []struct {
text string
kind LineKind
}{
{"LDA #10", Source},
{" lda #X // asm comment", Assembler},
{" y = X // script comment", Script},
{"STA $D020", Source},
}
if len(lines) != len(expected) {
t.Fatalf("expected %d lines, got %d", len(expected), len(lines))
}
for i, exp := range expected {
if lines[i].Text != exp.text {
t.Errorf("line %d: expected text %q, got %q", i, exp.text, lines[i].Text)
}
if lines[i].Kind != exp.kind {
t.Errorf("line %d: expected Kind=%v, got %v", i, exp.kind, lines[i].Kind)
}
}
}
func TestPreProcess_EmptyASMBlock(t *testing.T) {
files := map[string][]string{
"test.c65": {
"ASM",
"ENDASM",
"NOP",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
// Empty block produces no lines, just NOP
if len(lines) != 1 {
t.Fatalf("expected 1 line, got %d", len(lines))
}
if lines[0].Text != "NOP" {
t.Errorf("expected 'NOP', got %q", lines[0].Text)
}
}
func TestPreProcess_EmptyScriptBlock(t *testing.T) {
files := map[string][]string{
"test.c65": {
"SCRIPT",
"ENDSCRIPT",
"NOP",
},
}
reader := NewMockFileReader(files)
lines, err := PreProcess("test.c65", reader)
if err != nil {
t.Fatalf("PreProcess failed: %v", err)
}
if len(lines) != 1 {
t.Fatalf("expected 1 line, got %d", len(lines))
}
if lines[0].Text != "NOP" {
t.Errorf("expected 'NOP', got %q", lines[0].Text)
}
}