-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSmetana2Infinity.py
executable file
·499 lines (445 loc) · 18.6 KB
/
Smetana2Infinity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
#!/usr/bin/env python
# Smetana2Infinity.py
# An interpreter for the language "SMETANA To Infinity!"
# created by Tanner Swett, after SMETANA by Chris Pressey.
# http://esolangs.org/wiki/SMETANA_To_Infinity!
# https://catseye.tc/article/Automata.md#smetana
# Implemented by
# Anthony Kozar
# June 22-26 and July 4-5, 2019
import sys
import argparse
def printerr(message, line, linenum):
sys.stderr.write("Error: " + message + " on line " + linenum + ":\n")
sys.stderr.write(" " + line + '\n')
def parsererr(message, stepnum, tokens, tokidx):
sys.stderr.write("Error: " + message + " in step " + expr2str(stepnum) + ":\n")
# print up to 10 tokens for context
lastidx = min(tokidx + 10, len(tokens))
sys.stderr.write(" " + " ".join(map(str, tokens[tokidx:lastidx])) + '\n')
def warning(message):
sys.stderr.write("Warning: " + message + '\n')
# Token constants
DOT = '.'
PLUS = '+'
N = 'N'
STEP = 'Step'
STOP = 'Stop'
GO = 'Go'
TO = 'To'
SWAP = 'Swap'
WITH = 'With'
OUTPUT = 'Output'
CHAR = 'Character'
TEXT_TOKENS = [N, STEP, STOP, GO, TO, SWAP, WITH, OUTPUT, CHAR]
# Tokenize one line of the program listing.
# Returns a (possibly empty) list of tokens or False if an error occurs
def tokenize(line, linenum):
tokens = []
curtok = ""
# split line at whitespace and special characters
for c in line:
if c.isspace():
if curtok == "":
# discard extra whitespace and continue
continue
else:
# end of current token
tokens.append(curtok)
curtok = ""
elif c == '#':
if tokens == [] and curtok == "":
# comment line: ignore the rest of the line
return tokens
else:
printerr("Comment started after non-whitespace character", line, str(linenum))
return False
elif c == '.' or c == '+' or c == 'n' or c == 'N':
if curtok != "":
# end of current token
tokens.append(curtok)
tokens.append(c)
curtok = ""
else:
# add to current token
curtok += c
if curtok != "":
tokens.append(curtok)
# identify keywords and numbers
for i in xrange(len(tokens)):
if tokens[i].isdigit():
# convert number string to an integer
tokens[i] = int(tokens[i])
elif tokens[i] == DOT or tokens[i] == PLUS:
continue
else:
matched = False
# normalize the capitalization of keywords
tok = tokens[i].title()
for validtok in TEXT_TOKENS:
if tok == validtok:
tokens[i] = validtok
matched = True
break
if not matched:
# TODO? The specification says that
# "Whitespace is allowed before and after tokens".
# Do we need to allow for the possibility that
# there may not be any whitespace between keywords
# or keywords & numbers. (Eg. "Gotostep", "Step10")
printerr("Ilegal token '" + tokens[i] + "'", line, str(linenum))
return False
return tokens
# Program data structure
#
# A program is a tuple (numbered steps, expression steps) where the numbered steps
# are stored in a dictionary and the expression steps in a list. Each numbered
# step is an entry in the dictionary of the form
# step# -> (instruction code, arg1, arg2)
#
# A STOP instruction takes no arguments: (iSTOP, None, None)
# A GOTO instruction takes one argument: (iGOTO, step#, None)
# A SWAP instruction takes two arguments: (iSWAP, step#, step#)
# An OUTPUT instruction takes one argument: (iOUTP, number, None)
#
# Steps with expressions are stored in a list in the reverse order that they appear
# in the program listing. Each "n"-expression, an + b, is stored as a tuple (a, b).
# An expression step is a pair with the structure
# ( (a,b), (instruction code, arg1, arg2) )
# If an argument is a number then it is just an integer element of the inner tuple.
# If an argument is an expression then it is a pair (a,b) as above. Otherwise an
# argument is 'None'.
# Instruction codes
iSTOP = 0
iGOTO = 1
iSWAP = 2
iOUTP = 3
# Tuple indices
# for program
NUMDSTEPS = 0
EXPRSTEPS = 1
# for "n-"expression steps
NEXPR = 0
NINSTR = 1
# for instructions
ICODE = 0
ARG1 = 1
ARG2 = 2
# Parse an expression which can be just a number or an "n"-expression.
# Returns a pair (result, new idx) where result is an integer, a pair (a,b)
# representing an "n"-expression, or False if an error occurred. New idx is the
# index of the next token in tokens after the parsed expression.
def parse_expr(tokens, idx, curstep, stepidx):
leadingnum = None
haveN = False
addend = None
if type(tokens[idx]) is int:
leadingnum = tokens[idx]
idx += 1
if tokens[idx] == N:
haveN = True
idx += 1
if tokens[idx] == PLUS:
if type(tokens[idx+1]) is int:
addend = tokens[idx+1]
idx +=2
else:
parsererr("Illegal expression - expected a number after '+'", curstep, tokens, stepidx)
return (False, idx)
else:
addend = 0
elif leadingnum == None:
parsererr("Illegal expression - expected a number or 'n'", curstep, tokens, stepidx)
return (False, idx)
# have a valid expression, so return the result
if haveN:
if leadingnum is None: leadingnum = 1
return ((leadingnum, addend), idx)
else:
return (leadingnum, idx)
# parse() checks the syntax of the list of tokens and converts them to a "program"
# data structure for the interpreter. Returns the program or False if an error occurs.
def parse(tokens):
numberedsteps = {}
exprsteps = []
curstep = 0
curinstr = None
idx = 0
endidx = len(tokens)
while idx < endidx:
try:
# beginning of a step definition
stepidx = idx
if tokens[idx] == STEP:
idx += 1
# parse step number/expression
(expr,idx) = parse_expr(tokens, idx, curstep, stepidx)
if type(expr) is bool and not expr: return False
if type(expr) is int and expr == 0:
parsererr("0 is not a valid step number", 0, tokens, stepidx)
return False
curstep = expr
if tokens[idx] != DOT:
parsererr("Missing '.' after the step number", curstep, tokens, stepidx)
return False
idx += 1
else:
parsererr("Missing 'Step' at the beginning of a step definition", curstep, tokens, stepidx)
return False
# parse step instruction
# STOP instruction
if tokens[idx] == STOP:
idx += 1
curinstr = (iSTOP, None, None)
# GO TO instruction
elif tokens[idx] == GO:
if tokens[idx+1] != TO:
parsererr("Missing 'To' after 'Go'", curstep, tokens, stepidx)
return False
if tokens[idx+2] != STEP:
parsererr("Missing 'Step' after 'Go To'", curstep, tokens, stepidx)
return False
idx += 3
# parse target step number/expression
(expr,idx) = parse_expr(tokens, idx, curstep, stepidx)
if type(expr) is bool and not expr: return False
if type(expr) is int and expr == 0:
parsererr("0 is not a valid target step number", curstep, tokens, stepidx)
return False
if type(curstep) is int and not type(expr) is int:
parsererr("Illegal 'n'-expression in a numbered step", curstep, tokens, stepidx)
return False
curinstr = (iGOTO, expr, None)
# SWAP instruction
elif tokens[idx] == SWAP:
if tokens[idx+1] != STEP:
parsererr("Missing 'Step' after 'Swap'", curstep, tokens, stepidx)
return False
idx += 2
# parse first target step number/expression
(arg1,idx) = parse_expr(tokens, idx, curstep, stepidx)
if type(arg1) is bool and not arg1: return False
if type(arg1) is int and arg1 == 0:
parsererr("0 is not a valid target step number", curstep, tokens, stepidx)
return False
if type(curstep) is int and not type(arg1) is int:
parsererr("Illegal 'n'-expression in a numbered step", curstep, tokens, stepidx)
return False
# check for "with step"
if tokens[idx] != WITH:
parsererr("Missing 'With' after first swap target", curstep, tokens, stepidx)
return False
if tokens[idx+1] != STEP:
parsererr("Missing 'Step' after 'With'", curstep, tokens, stepidx)
return False
idx += 2
# parse second target step number/expression
(arg2,idx) = parse_expr(tokens, idx, curstep, stepidx)
if type(arg2) is bool and not arg2: return False
if type(arg2) is int and arg2 == 0:
parsererr("0 is not a valid target step number", curstep, tokens, stepidx)
return False
if type(curstep) is int and not type(arg2) is int:
parsererr("Illegal 'n'-expression in a numbered step", curstep, tokens, stepidx)
return False
curinstr = (iSWAP, arg1, arg2)
# OUTPUT instruction
elif tokens[idx] == OUTPUT:
if tokens[idx+1] != CHAR:
parsererr("Missing 'Character' after 'Output'", curstep, tokens, stepidx)
return False
idx += 2
# parse number/expression to output
(expr,idx) = parse_expr(tokens, idx, curstep, stepidx)
if type(expr) is bool and not expr: return False
if type(curstep) is int and not type(expr) is int:
parsererr("Illegal 'n'-expression in a numbered step", curstep, tokens, stepidx)
return False
curinstr = (iOUTP, expr, None)
else:
parsererr("Illegal instruction '" + str(tokens[idx]) + "'", curstep, tokens, stepidx)
return False
# check for end of step
if tokens[idx] != DOT:
parsererr("Missing '.' at end of instruction", curstep, tokens, stepidx)
return False
idx += 1
# add step to program
if type(curstep) is int:
numberedsteps[curstep] = curinstr
else:
exprsteps.append( (curstep, curinstr) )
# If this expression can match any previously defined steps, remove them.
# (We could remove matching expression steps too, but leaving them does
# not affect the correct execution of the program).
for stepnum in numberedsteps.keys():
if matchNexpression(stepnum, curstep):
del numberedsteps[stepnum]
warning("Step " + nexpr2str(curstep) + " has replaced step " + str(stepnum))
except IndexError:
parsererr("Unexpected end of program OR internal parser error", curstep, tokens, stepidx)
return False
# reverse the order of the steps with expressions so that the last matching expression has precedence
exprsteps.reverse()
return (numberedsteps, exprsteps)
# Converts an "n"-expression tuple (a, b) into the string "an + b".
def nexpr2str(nexpr):
if nexpr[1] == 0 and nexpr[0] == 1: return "n"
elif nexpr[1] == 0: return "%dn" % nexpr[0]
elif nexpr[0] == 1: return "n + %d" % nexpr[1]
else: return "%dn + %d" % nexpr
# Convert any expression to a string.
def expr2str(expr):
if isNexpr(expr):
return nexpr2str(expr)
else:
return str(expr)
# Returns true if stepnum matches the "n"-expression an + b for some positive n.
def matchNexpression(stepnum, nexpr):
# check whether an integer n exists that satisfies the equation an + b = stepnum
an = stepnum - nexpr[1]
(n, rem) = divmod(an, nexpr[0])
return rem == 0 and n > 0
# Returns true if arg is a pair (a, b) representing an "n"-expression
def isNexpr(arg):
return type(arg) is tuple and len(arg) == 2
# Searches a program for a step matching stepnum. Returns a pair (stepnum, instruction)
# if a match is found. Returns None if no match found.
def findstep(stepnum, program):
# look first for stepnum among the numbered steps
if program[NUMDSTEPS].has_key(stepnum):
return (stepnum, program[NUMDSTEPS][stepnum])
else:
# search for a matching expression step
for step in program[EXPRSTEPS]:
if matchNexpression(stepnum, step[NEXPR]):
return step
# no match found
return None
# Evaluate any expressions in the instruction portion of an expression step
# and return the new instruction tuple. Assumes that stepexpr has already
# been checked to match curstepnum.
def evalexprstep(curstepnum, stepexpr, instr):
# solve the equation an + b = curstepnum for n
an = curstepnum - stepexpr[1]
n = an // stepexpr[0]
# evaluate the arguments of instr
if isNexpr(instr[ARG1]):
arg1 = instr[ARG1][0] * n + instr[ARG1][1]
else:
arg1 = instr[ARG1]
if isNexpr(instr[ARG2]):
arg2 = instr[ARG2][0] * n + instr[ARG2][1]
else:
arg2 = instr[ARG2]
return (instr[ICODE], arg1, arg2)
# Takes the result of findstep() as input 'step' plus the current step number
# and makes sure that it is ready to be executed or swapped.
def preparestep(step, curstep):
if step:
(stepnum, instr) = step
if isNexpr(stepnum):
instr = evalexprstep(curstep, stepnum, instr)
else:
# no step was found, so use the default STOP instruction
(stepnum, instr) = (curstep, (iSTOP, None, None))
# NOTE: stepnum may be an "n"-expression
return (stepnum, instr)
# Output formats
outINTEGER = 1
outASCII = 2
outUNICODE = 3
def execute(program, outformat = outINTEGER, startstep = 1, trace = False):
steps = program[NUMDSTEPS] # dictionary of numbered steps
# exprsteps = program[EXPRSTEPS] # list of expression steps
curstep = startstep
done = False
while not done:
step = findstep(curstep, program)
(stepnum, instr) = preparestep(step, curstep)
if trace:
print STEP, curstep,
printinstruction(instr)
# execute the current step
instrcode = instr[ICODE]
if instr[ICODE] == iSTOP:
done = True
elif instr[ICODE] == iGOTO:
curstep = instr[ARG1]
elif instr[ICODE] == iSWAP:
# get the target steps
target1 = instr[ARG1]
target2 = instr[ARG2]
(stepnum1, instr1) = preparestep(findstep(target1, program), target1)
(stepnum2, instr2) = preparestep(findstep(target2, program), target2)
# swap them in the program's numbered steps
steps[target1] = instr2
steps[target2] = instr1
curstep += 1
elif instr[ICODE] == iOUTP:
if outformat == outINTEGER:
print instr[ARG1]
elif outformat == outASCII:
if instr[ARG1] >= 0 and instr[ARG1] <= 127:
sys.stdout.write(chr(instr[ARG1]))
else:
warning("ASCII char value %d out of range." % instr[ARG1])
elif outformat == outUNICODE:
if instr[ARG1] >= 0 and instr[ARG1] <= 65534:
sys.stdout.write(unichr(instr[ARG1]))
else:
warning("Unicode char value %d out of range." % instr[ARG1])
curstep += 1
return curstep
# Expects a tuple of the form (instruction code, arg1, arg2)
def printinstruction(instr):
if instr[ICODE] == iSTOP:
print STOP
elif instr[ICODE] == iGOTO:
print GO, TO, instr[ARG1]
elif instr[ICODE] == iSWAP:
print SWAP, instr[ARG1], instr[ARG2]
elif instr[ICODE] == iOUTP:
print OUTPUT, instr[ARG1]
# MAIN program
# Exit constants
EXIT_OK = 0
EXIT_TOKEN_ERR = 3
EXIT_PARSE_ERR = 4
# set up commandline argument handling
parser = argparse.ArgumentParser(description="An interpreter for the language SMETANA To Infinity!")
parser.add_argument("program", help="filename of the STI program to run")
parser.add_argument("-s", "--start", help="begin execution with step N", type=int, default=1, metavar='N')
parser.add_argument("-t", "--trace", help="trace program execution", action="store_true")
outgrp = parser.add_argument_group("output options", "Set the behavior of the 'Output character' statement.")
outgrp.add_argument("-a", "--ascii", help="output ASCII characters", dest='outformat', action='store_const', const=outASCII, default=outINTEGER)
outgrp.add_argument("-i", "--integers", help="output integers (default)", dest='outformat', action='store_const', const=outINTEGER, default=outINTEGER)
outgrp.add_argument("-u", "--unicode", help="output Unicode characters", dest='outformat', action='store_const', const=outUNICODE, default=outINTEGER)
args = parser.parse_args()
# read program file and tokenize it one line at a time
f = open(args.program)
try:
linenum = 1
alltokens = []
for line in f:
tokens = tokenize(line, linenum)
if not tokens and type(tokens) is bool:
sys.exit(EXIT_TOKEN_ERR)
# print tokens
# The spec does not restrict where newlines can appear so we allow
# steps to cross line boundaries and multiple steps per line.
# (This contrasts with the SMETANA grammar which requires a newline
# after each step).
alltokens += tokens
linenum += 1
finally:
f.close()
# parse tokens and "compile" program
program = parse(alltokens)
if not program and type(program) is bool:
sys.stderr.flush()
sys.exit(EXIT_PARSE_ERR)
# print program
# run the program with the specified options
execute(program, args.outformat, args.start, args.trace)
sys.exit(EXIT_OK)