import sys # Toy simulation of the cmd.exe command interpreter from Windows XP # Unlike Unix, Windows passes an entire commandline, # spaces and all, to the app being started; thus cmd # tends to not do as much quote processing as Unix shells. # Useful references for cmd: # http://ss64.com/nt/syntax.html # http://en.wikibooks.org/wiki/Windows_Programming/Programming_CMD # Useful trivia about cmd: # http://blogs.msdn.com/oldnewthing/archive/2005/01/28/362565.aspx # http://blogs.msdn.com/oldnewthing/archive/2005/09/09/462906.aspx # http://blogs.msdn.com/oldnewthing/archive/2006/05/04/589884.aspx # http://blogs.msdn.com/oldnewthing/archive/2006/05/17/599916.aspx *** useful # http://blogs.msdn.com/oldnewthing/archive/2006/08/23/714650.aspx # http://blogs.msdn.com/oldnewthing/archive/2007/11/21/6447771.aspx # http://blogs.msdn.com/oldnewthing/archive/2008/04/17/8399914.aspx # http://blogs.msdn.com/oldnewthing/archive/2008/09/26/8965755.aspx #--------------------------- the stages of parsing in cmd --------------------------- # The cmd language, like the sh language, has to be parsed in # several stages. The stages aren't documented anywhere, but # http://blogs.msdn.com/oldnewthing/archive/2006/05/17/599916.aspx # gives an idea how to tell what they are, and lists the first few: # 1. paste together lines continued by & # 2. ... # 3. Profit! # So, we'll define one function for each pass; each will take in # a string, and return the processed string; if it needs more # input, it will return "(more)" for now. def phase1_join_lines(buf): if buf[-1] == '&': return "(more)" return buf #--------------------------- yacc parser for internal commands --------------------------- # Reserved words of the cmd language reserved = { 'echo' : 'ECHO', 'help' : 'HELP', 'goto' : 'GOTO', 'rem' : 'REM', 'set' : 'SET', } tokens = [ 'AMP', 'LABEL', 'STRING' ] + list(reserved.values()) t_ignore = " \t" t_AMP = r'\&' t_LABEL = r':[a-zA-Z_][a-zA-Z0-9_]*' # two colons is a synonym for REM def t_REM(t): r'\:\:' return t def t_STRING(t): r'[^\s&]+' t.type = reserved.get(t.value,'STRING') # Check for reserved words return t def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") def t_error(t): print "Illegal character '%s'" % t.value[0] t.lexer.skip(1) # Build the lexer import ply.lex as lex lex.lex() def p_start(p): 'statements : statement' def p_statement_compound_next(p): 'statements : statements AMP statement' def p_statement_echo(p): 'statement : ECHO STRING' print p[2] def p_statement_help(p): 'statement : HELP' print "there is no help" def p_statement_goto(p): 'statement : GOTO LABEL' print "Going to " + p[2] def p_statement_set(p): 'statement : SET STRING' print "Munging %s" % p[2] def p_statement_label(p): 'statement : LABEL' print "defined label %s" % p[1] print "at line %d" % p.lineno(1) def p_statement_rem(p): 'statement : REM STRING' print "remark %s" % p[2] def p_error(p): print "Syntax error at '%s'" % p.value import ply.yacc as yacc yacc.yacc() while 1: cmd = "" s = "(more)" prompt = 'cmd > ' while s == "(more)": try: s = raw_input(prompt) except EOFError: break cmd = cmd + s s = phase1_join_lines(cmd) prompt = "(more) " # Just for testing: show tokens from one line of input lex.input(s) while True: tok = lex.token() if not tok: break print tok # ok, now parse yacc.parse(s)