Package rivescript :: Module rivescript
[hide private]
[frames] | no frames]

Source Code for Module rivescript.rivescript

   1  #!/usr/bin/env python 
   2   
   3  # The MIT License (MIT) 
   4  # 
   5  # Copyright (c) 2015 Noah Petherbridge 
   6  # 
   7  # Permission is hereby granted, free of charge, to any person obtaining a copy 
   8  # of this software and associated documentation files (the "Software"), to deal 
   9  # in the Software without restriction, including without limitation the rights 
  10  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
  11  # copies of the Software, and to permit persons to whom the Software is 
  12  # furnished to do so, subject to the following conditions: 
  13  # 
  14  # The above copyright notice and this permission notice shall be included in all 
  15  # copies or substantial portions of the Software. 
  16  # 
  17  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  18  # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  19  # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
  20  # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
  21  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
  22  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
  23  # SOFTWARE. 
  24   
  25  from __future__ import unicode_literals 
  26  from six import text_type 
  27  import sys 
  28  import os 
  29  import re 
  30  import string 
  31  import random 
  32  import pprint 
  33  import copy 
  34  import codecs 
  35   
  36  from . import __version__ 
  37  from . import python 
38 39 # Common regular expressions. 40 -class RE(object):
41 equals = re.compile('\s*=\s*') 42 ws = re.compile('\s+') 43 objend = re.compile('^\s*<\s*object') 44 weight = re.compile('\{weight=(\d+)\}') 45 inherit = re.compile('\{inherits=(\d+)\}') 46 wilds = re.compile('[\s\*\#\_]+') 47 nasties = re.compile('[^A-Za-z0-9 ]') 48 crlf = re.compile('<crlf>') 49 literal_w = re.compile(r'\\w') 50 array = re.compile(r'\@(.+?)\b') 51 def_syntax = re.compile(r'^.+(?:\s+.+|)\s*=\s*.+?$') 52 name_syntax = re.compile(r'[^a-z0-9_\-\s]') 53 utf8_trig = re.compile(r'[A-Z\\.]') 54 trig_syntax = re.compile(r'[^a-z0-9(\|)\[\]*_#@{}<>=\s]') 55 cond_syntax = re.compile(r'^.+?\s*(?:==|eq|!=|ne|<>|<|<=|>|>=)\s*.+?=>.+?$') 56 utf8_meta = re.compile(r'[\\<>]') 57 utf8_punct = re.compile(r'[.?,!;:@#$%^&*()]') 58 cond_split = re.compile(r'\s*=>\s*') 59 cond_parse = re.compile(r'^(.+?)\s+(==|eq|!=|ne|<>|<|<=|>|>=)\s+(.+?)$') 60 topic_tag = re.compile(r'\{topic=(.+?)\}') 61 set_tag = re.compile(r'<set (.+?)=(.+?)>') 62 bot_tag = re.compile(r'<bot (.+?)>') 63 get_tag = re.compile(r'<get (.+?)>') 64 star_tags = re.compile(r'<star(\d+)>') 65 botstars = re.compile(r'<botstar(\d+)>') 66 input_tags = re.compile(r'<input([1-9])>') 67 reply_tags = re.compile(r'<reply([1-9])>') 68 random_tags = re.compile(r'\{random\}(.+?)\{/random\}') 69 redir_tag = re.compile(r'\{@(.+?)\}') 70 tag_search = re.compile(r'<([^<]+?)>') 71 placeholder = re.compile(r'\x00(\d+)\x00') 72 zero_star = re.compile(r'^\*$') 73 optionals = re.compile(r'\[(.+?)\]')
74 75 # Version of RiveScript we support. 76 rs_version = 2.0 77 78 # Exportable constants. 79 RS_ERR_MATCH = "[ERR: No reply matched]" 80 RS_ERR_REPLY = "[ERR: No reply found]" 81 RS_ERR_DEEP_RECURSION = "[ERR: Deep recursion detected]" 82 RS_ERR_OBJECT = "[ERR: Error when executing Python object]" 83 RS_ERR_OBJECT_HANDLER = "[ERR: No Object Handler]" 84 RS_ERR_OBJECT_MISSING = "[ERR: Object Not Found]"
85 86 87 -class RiveScript(object):
88 """A RiveScript interpreter for Python 2 and 3.""" 89 90 # Concatenation mode characters. 91 _concat_modes = dict( 92 none="", 93 space=" ", 94 newline="\n", 95 ) 96 97 ############################################################################ 98 # Initialization and Utility Methods # 99 ############################################################################ 100
101 - def __init__(self, debug=False, strict=True, depth=50, log="", utf8=False):
102 """Initialize a new RiveScript interpreter. 103 104 bool debug: Specify a debug mode. 105 bool strict: Strict mode (RS syntax errors are fatal) 106 str log: Specify a log file for debug output to go to (instead of STDOUT). 107 int depth: Specify the recursion depth limit. 108 bool utf8: Enable UTF-8 support.""" 109 # Instance variables. 110 self._debug = debug # Debug mode 111 self._log = log # Debug log file 112 self._utf8 = utf8 # UTF-8 mode 113 self._strict = strict # Strict mode 114 self._depth = depth # Recursion depth limit 115 self._gvars = {} # 'global' variables 116 self._bvars = {} # 'bot' variables 117 self._subs = {} # 'sub' variables 118 self._person = {} # 'person' variables 119 self._arrays = {} # 'array' variables 120 self._users = {} # 'user' variables 121 self._freeze = {} # frozen 'user' variables 122 self._includes = {} # included topics 123 self._lineage = {} # inherited topics 124 self._handlers = {} # Object handlers 125 self._objlangs = {} # Languages of objects used 126 self._topics = {} # Main reply structure 127 self._thats = {} # %Previous reply structure 128 self._sorted = {} # Sorted buffers 129 self._syntax = {} # Syntax tracking (filenames & line no.'s) 130 self._regexc = { # Precomputed regexes for speed optimizations. 131 "trigger": {}, 132 "subs": {}, 133 "person": {}, 134 } 135 136 # "Current request" variables. 137 self._current_user = None # The current user ID. 138 139 # Define the default Python language handler. 140 self._handlers["python"] = python.PyRiveObjects() 141 142 self._say("Interpreter initialized.")
143 144 @classmethod
145 - def VERSION(self=None):
146 """Return the version number of the RiveScript library. 147 148 This may be called as either a class method or a method of a RiveScript object.""" 149 return __version__
150
151 - def _say(self, message):
152 if self._debug: 153 print("[RS] {}".format(message)) 154 if self._log: 155 # Log it to the file. 156 fh = open(self._log, 'a') 157 fh.write("[RS] " + message + "\n") 158 fh.close()
159
160 - def _warn(self, message, fname='', lineno=0):
161 header = "[RS]" 162 if self._debug: 163 header = "[RS::Warning]" 164 if len(fname) and lineno > 0: 165 print(header, message, "at", fname, "line", lineno) 166 else: 167 print(header, message)
168 169 ############################################################################ 170 # Loading and Parsing Methods # 171 ############################################################################ 172
173 - def load_directory(self, directory, ext=None):
174 """Load RiveScript documents from a directory. 175 176 Provide `ext` as a list of extensions to search for. The default list 177 is `.rive`, `.rs`""" 178 self._say("Loading from directory: " + directory) 179 180 if ext is None: 181 # Use the default extensions - .rive is preferable. 182 ext = ['.rive', '.rs'] 183 elif type(ext) == str: 184 # Backwards compatibility for ext being a string value. 185 ext = [ext] 186 187 if not os.path.isdir(directory): 188 self._warn("Error: " + directory + " is not a directory.") 189 return 190 191 for item in os.listdir(directory): 192 for extension in ext: 193 if item.lower().endswith(extension): 194 # Load this file. 195 self.load_file(os.path.join(directory, item)) 196 break
197
198 - def load_file(self, filename):
199 """Load and parse a RiveScript document.""" 200 self._say("Loading file: " + filename) 201 202 fh = codecs.open(filename, 'r', 'utf-8') 203 lines = fh.readlines() 204 fh.close() 205 206 self._say("Parsing " + str(len(lines)) + " lines of code from " + filename) 207 self._parse(filename, lines)
208
209 - def stream(self, code):
210 """Stream in RiveScript source code dynamically. 211 212 `code` can either be a string containing RiveScript code or an array 213 of lines of RiveScript code.""" 214 self._say("Streaming code.") 215 if type(code) in [str, text_type]: 216 code = code.split("\n") 217 self._parse("stream()", code)
218
219 - def _parse(self, fname, code):
220 """Parse RiveScript code into memory.""" 221 self._say("Parsing code") 222 223 # Track temporary variables. 224 topic = 'random' # Default topic=random 225 lineno = 0 # Line numbers for syntax tracking 226 comment = False # In a multi-line comment 227 inobj = False # In an object 228 objname = '' # The name of the object we're in 229 objlang = '' # The programming language of the object 230 objbuf = [] # Object contents buffer 231 ontrig = '' # The current trigger 232 repcnt = 0 # Reply counter 233 concnt = 0 # Condition counter 234 isThat = '' # Is a %Previous trigger 235 236 # Local (file scoped) parser options. 237 local_options = dict( 238 concat="none", # Concat mode for ^Continue command 239 ) 240 241 # Read each line. 242 for lp, line in enumerate(code): 243 lineno += 1 244 245 self._say("Line: " + line + " (topic: " + topic + ") incomment: " + str(inobj)) 246 if len(line.strip()) == 0: # Skip blank lines 247 continue 248 249 # In an object? 250 if inobj: 251 if re.match(RE.objend, line): 252 # End the object. 253 if len(objname): 254 # Call the object's handler. 255 if objlang in self._handlers: 256 self._objlangs[objname] = objlang 257 self._handlers[objlang].load(objname, objbuf) 258 else: 259 self._warn("Object creation failed: no handler for " + objlang, fname, lineno) 260 objname = '' 261 objlang = '' 262 objbuf = [] 263 inobj = False 264 else: 265 objbuf.append(line) 266 continue 267 268 line = line.strip() # Trim excess space. We do it down here so we 269 # don't mess up python objects! 270 271 # Look for comments. 272 if line[:2] == '//': # A single-line comment. 273 continue 274 elif line[0] == '#': 275 self._warn("Using the # symbol for comments is deprecated", fname, lineno) 276 elif line[:2] == '/*': # Start of a multi-line comment. 277 if '*/' not in line: # Cancel if the end is here too. 278 comment = True 279 continue 280 elif '*/' in line: 281 comment = False 282 continue 283 if comment: 284 continue 285 286 # Separate the command from the data. 287 if len(line) < 2: 288 self._warn("Weird single-character line '" + line + "' found.", fname, lineno) 289 continue 290 cmd = line[0] 291 line = line[1:].strip() 292 293 # Ignore inline comments if there's a space before and after 294 # the // symbols. 295 if " // " in line: 296 line = line.split(" // ")[0].strip() 297 298 # Run a syntax check on this line. 299 syntax_error = self.check_syntax(cmd, line) 300 if syntax_error: 301 # There was a syntax error! Are we enforcing strict mode? 302 syntax_error = "Syntax error in " + fname + " line " + str(lineno) + ": " \ 303 + syntax_error + " (near: " + cmd + " " + line + ")" 304 if self._strict: 305 raise Exception(syntax_error) 306 else: 307 self._warn(syntax_error) 308 return # Don't try to continue 309 310 # Reset the %Previous state if this is a new +Trigger. 311 if cmd == '+': 312 isThat = '' 313 314 # Do a lookahead for ^Continue and %Previous commands. 315 for i in range(lp + 1, len(code)): 316 lookahead = code[i].strip() 317 if len(lookahead) < 2: 318 continue 319 lookCmd = lookahead[0] 320 lookahead = lookahead[1:].strip() 321 322 # Only continue if the lookahead line has any data. 323 if len(lookahead) != 0: 324 # The lookahead command has to be either a % or a ^. 325 if lookCmd != '^' and lookCmd != '%': 326 break 327 328 # If the current command is a +, see if the following is 329 # a %. 330 if cmd == '+': 331 if lookCmd == '%': 332 isThat = lookahead 333 break 334 else: 335 isThat = '' 336 337 # If the current command is a ! and the next command(s) are 338 # ^, we'll tack each extension on as a line break (which is 339 # useful information for arrays). 340 if cmd == '!': 341 if lookCmd == '^': 342 line += "<crlf>" + lookahead 343 continue 344 345 # If the current command is not a ^ and the line after is 346 # not a %, but the line after IS a ^, then tack it on to the 347 # end of the current line. 348 if cmd != '^' and lookCmd != '%': 349 if lookCmd == '^': 350 line += self._concat_modes.get( 351 local_options["concat"], "" 352 ) + lookahead 353 else: 354 break 355 356 self._say("Command: " + cmd + "; line: " + line) 357 358 # Handle the types of RiveScript commands. 359 if cmd == '!': 360 # ! DEFINE 361 halves = re.split(RE.equals, line, 2) 362 left = re.split(RE.ws, halves[0].strip(), 2) 363 value, type, var = '', '', '' 364 if len(halves) == 2: 365 value = halves[1].strip() 366 if len(left) >= 1: 367 type = left[0].strip() 368 if len(left) >= 2: 369 var = ' '.join(left[1:]).strip() 370 371 # Remove 'fake' line breaks unless this is an array. 372 if type != 'array': 373 value = re.sub(RE.crlf, '', value) 374 375 # Handle version numbers. 376 if type == 'version': 377 # Verify we support it. 378 try: 379 if float(value) > rs_version: 380 self._warn("Unsupported RiveScript version. We only support " + rs_version, fname, lineno) 381 return 382 except: 383 self._warn("Error parsing RiveScript version number: not a number", fname, lineno) 384 continue 385 386 # All other types of defines require a variable and value name. 387 if len(var) == 0: 388 self._warn("Undefined variable name", fname, lineno) 389 continue 390 elif len(value) == 0: 391 self._warn("Undefined variable value", fname, lineno) 392 continue 393 394 # Handle the rest of the types. 395 if type == 'local': 396 # Local file-scoped parser options. 397 self._say("\tSet parser option " + var + " = " + value) 398 local_options[var] = value 399 elif type == 'global': 400 # 'Global' variables 401 self._say("\tSet global " + var + " = " + value) 402 403 if value == '<undef>': 404 try: 405 del(self._gvars[var]) 406 except: 407 self._warn("Failed to delete missing global variable", fname, lineno) 408 else: 409 self._gvars[var] = value 410 411 # Handle flipping debug and depth vars. 412 if var == 'debug': 413 if value.lower() == 'true': 414 value = True 415 else: 416 value = False 417 self._debug = value 418 elif var == 'depth': 419 try: 420 self._depth = int(value) 421 except: 422 self._warn("Failed to set 'depth' because the value isn't a number!", fname, lineno) 423 elif var == 'strict': 424 if value.lower() == 'true': 425 self._strict = True 426 else: 427 self._strict = False 428 elif type == 'var': 429 # Bot variables 430 self._say("\tSet bot variable " + var + " = " + value) 431 432 if value == '<undef>': 433 try: 434 del(self._bvars[var]) 435 except: 436 self._warn("Failed to delete missing bot variable", fname, lineno) 437 else: 438 self._bvars[var] = value 439 elif type == 'array': 440 # Arrays 441 self._say("\tArray " + var + " = " + value) 442 443 if value == '<undef>': 444 try: 445 del(self._arrays[var]) 446 except: 447 self._warn("Failed to delete missing array", fname, lineno) 448 continue 449 450 # Did this have multiple parts? 451 parts = value.split("<crlf>") 452 453 # Process each line of array data. 454 fields = [] 455 for val in parts: 456 if '|' in val: 457 fields.extend(val.split('|')) 458 else: 459 fields.extend(re.split(RE.ws, val)) 460 461 # Convert any remaining '\s' escape codes into spaces. 462 for f in fields: 463 f = f.replace('\s', ' ') 464 465 self._arrays[var] = fields 466 elif type == 'sub': 467 # Substitutions 468 self._say("\tSubstitution " + var + " => " + value) 469 470 if value == '<undef>': 471 try: 472 del(self._subs[var]) 473 except: 474 self._warn("Failed to delete missing substitution", fname, lineno) 475 else: 476 self._subs[var] = value 477 478 # Precompile the regexp. 479 self._precompile_substitution("subs", var) 480 elif type == 'person': 481 # Person Substitutions 482 self._say("\tPerson Substitution " + var + " => " + value) 483 484 if value == '<undef>': 485 try: 486 del(self._person[var]) 487 except: 488 self._warn("Failed to delete missing person substitution", fname, lineno) 489 else: 490 self._person[var] = value 491 492 # Precompile the regexp. 493 self._precompile_substitution("person", var) 494 else: 495 self._warn("Unknown definition type '" + type + "'", fname, lineno) 496 elif cmd == '>': 497 # > LABEL 498 temp = re.split(RE.ws, line) 499 type = temp[0] 500 name = '' 501 fields = [] 502 if len(temp) >= 2: 503 name = temp[1] 504 if len(temp) >= 3: 505 fields = temp[2:] 506 507 # Handle the label types. 508 if type == 'begin': 509 # The BEGIN block. 510 self._say("\tFound the BEGIN block.") 511 type = 'topic' 512 name = '__begin__' 513 if type == 'topic': 514 # Starting a new topic. 515 self._say("\tSet topic to " + name) 516 ontrig = '' 517 topic = name 518 519 # Does this topic include or inherit another one? 520 mode = '' # or 'inherits' or 'includes' 521 if len(fields) >= 2: 522 for field in fields: 523 if field == 'includes': 524 mode = 'includes' 525 elif field == 'inherits': 526 mode = 'inherits' 527 elif mode != '': 528 # This topic is either inherited or included. 529 if mode == 'includes': 530 if name not in self._includes: 531 self._includes[name] = {} 532 self._includes[name][field] = 1 533 else: 534 if name not in self._lineage: 535 self._lineage[name] = {} 536 self._lineage[name][field] = 1 537 elif type == 'object': 538 # If a field was provided, it should be the programming 539 # language. 540 lang = None 541 if len(fields) > 0: 542 lang = fields[0].lower() 543 544 # Only try to parse a language we support. 545 ontrig = '' 546 if lang is None: 547 self._warn("Trying to parse unknown programming language", fname, lineno) 548 lang = 'python' # Assume it's Python. 549 550 # See if we have a defined handler for this language. 551 if lang in self._handlers: 552 # We have a handler, so start loading the code. 553 objname = name 554 objlang = lang 555 objbuf = [] 556 inobj = True 557 else: 558 # We don't have a handler, just ignore it. 559 objname = '' 560 objlang = '' 561 objbuf = [] 562 inobj = True 563 else: 564 self._warn("Unknown label type '" + type + "'", fname, lineno) 565 elif cmd == '<': 566 # < LABEL 567 type = line 568 569 if type == 'begin' or type == 'topic': 570 self._say("\tEnd topic label.") 571 topic = 'random' 572 elif type == 'object': 573 self._say("\tEnd object label.") 574 inobj = False 575 elif cmd == '+': 576 # + TRIGGER 577 self._say("\tTrigger pattern: " + line) 578 if len(isThat): 579 self._initTT('thats', topic, isThat, line) 580 self._initTT('syntax', topic, line, 'thats') 581 self._syntax['thats'][topic][line]['trigger'] = (fname, lineno) 582 else: 583 self._initTT('topics', topic, line) 584 self._initTT('syntax', topic, line, 'topic') 585 self._syntax['topic'][topic][line]['trigger'] = (fname, lineno) 586 ontrig = line 587 repcnt = 0 588 concnt = 0 589 590 # Pre-compile the trigger's regexp if possible. 591 self._precompile_regexp(ontrig) 592 elif cmd == '-': 593 # - REPLY 594 if ontrig == '': 595 self._warn("Response found before trigger", fname, lineno) 596 continue 597 self._say("\tResponse: " + line) 598 if len(isThat): 599 self._thats[topic][isThat][ontrig]['reply'][repcnt] = line 600 self._syntax['thats'][topic][ontrig]['reply'][repcnt] = (fname, lineno) 601 else: 602 self._topics[topic][ontrig]['reply'][repcnt] = line 603 self._syntax['topic'][topic][ontrig]['reply'][repcnt] = (fname, lineno) 604 repcnt += 1 605 elif cmd == '%': 606 # % PREVIOUS 607 pass # This was handled above. 608 elif cmd == '^': 609 # ^ CONTINUE 610 pass # This was handled above. 611 elif cmd == '@': 612 # @ REDIRECT 613 self._say("\tRedirect response to " + line) 614 if len(isThat): 615 self._thats[topic][isThat][ontrig]['redirect'] = line 616 self._syntax['thats'][topic][ontrig]['redirect'] = (fname, lineno) 617 else: 618 self._topics[topic][ontrig]['redirect'] = line 619 self._syntax['topic'][topic][ontrig]['redirect'] = (fname, lineno) 620 elif cmd == '*': 621 # * CONDITION 622 self._say("\tAdding condition: " + line) 623 if len(isThat): 624 self._thats[topic][isThat][ontrig]['condition'][concnt] = line 625 self._syntax['thats'][topic][ontrig]['condition'][concnt] = (fname, lineno) 626 else: 627 self._topics[topic][ontrig]['condition'][concnt] = line 628 self._syntax['topic'][topic][ontrig]['condition'][concnt] = (fname, lineno) 629 concnt += 1 630 else: 631 self._warn("Unrecognized command \"" + cmd + "\"", fname, lineno) 632 continue
633
634 - def check_syntax(self, cmd, line):
635 """Syntax check a RiveScript command and line. 636 637 Returns a syntax error string on error; None otherwise.""" 638 639 # Run syntax checks based on the type of command. 640 if cmd == '!': 641 # ! Definition 642 # - Must be formatted like this: 643 # ! type name = value 644 # OR 645 # ! type = value 646 match = re.match(RE.def_syntax, line) 647 if not match: 648 return "Invalid format for !Definition line: must be '! type name = value' OR '! type = value'" 649 elif cmd == '>': 650 # > Label 651 # - The "begin" label must have only one argument ("begin") 652 # - "topic" labels must be lowercased but can inherit other topics (a-z0-9_\s) 653 # - "object" labels must follow the same rules as "topic", but don't need to be lowercase 654 parts = re.split(" ", line, 2) 655 if parts[0] == "begin" and len(parts) > 1: 656 return "The 'begin' label takes no additional arguments, should be verbatim '> begin'" 657 elif parts[0] == "topic": 658 match = re.match(RE.name_syntax, line) 659 if match: 660 return "Topics should be lowercased and contain only numbers and letters" 661 elif parts[0] == "object": 662 match = re.match(RE.name_syntax, line) 663 if match: 664 return "Objects can only contain numbers and letters" 665 elif cmd == '+' or cmd == '%' or cmd == '@': 666 # + Trigger, % Previous, @ Redirect 667 # This one is strict. The triggers are to be run through the regexp engine, 668 # therefore it should be acceptable for the regexp engine. 669 # - Entirely lowercase 670 # - No symbols except: ( | ) [ ] * _ # @ { } < > = 671 # - All brackets should be matched 672 parens = 0 # Open parenthesis 673 square = 0 # Open square brackets 674 curly = 0 # Open curly brackets 675 angle = 0 # Open angled brackets 676 677 # Count brackets. 678 for char in line: 679 if char == '(': 680 parens += 1 681 elif char == ')': 682 parens -= 1 683 elif char == '[': 684 square += 1 685 elif char == ']': 686 square -= 1 687 elif char == '{': 688 curly += 1 689 elif char == '}': 690 curly -= 1 691 elif char == '<': 692 angle += 1 693 elif char == '>': 694 angle -= 1 695 696 # Any mismatches? 697 if parens != 0: 698 return "Unmatched parenthesis brackets" 699 elif square != 0: 700 return "Unmatched square brackets" 701 elif curly != 0: 702 return "Unmatched curly brackets" 703 elif angle != 0: 704 return "Unmatched angle brackets" 705 706 # In UTF-8 mode, most symbols are allowed. 707 if self._utf8: 708 match = re.match(RE.utf8_trig, line) 709 if match: 710 return "Triggers can't contain uppercase letters, backslashes or dots in UTF-8 mode." 711 else: 712 match = re.match(RE.trig_syntax, line) 713 if match: 714 return "Triggers may only contain lowercase letters, numbers, and these symbols: ( | ) [ ] * _ # @ { } < > =" 715 elif cmd == '-' or cmd == '^' or cmd == '/': 716 # - Trigger, ^ Continue, / Comment 717 # These commands take verbatim arguments, so their syntax is loose. 718 pass 719 elif cmd == '*': 720 # * Condition 721 # Syntax for a conditional is as follows: 722 # * value symbol value => response 723 match = re.match(RE.cond_syntax, line) 724 if not match: 725 return "Invalid format for !Condition: should be like '* value symbol value => response'" 726 727 return None
728
729 - def deparse(self):
730 """Return the in-memory RiveScript document as a Python data structure. 731 732 This would be useful for developing a user interface for editing 733 RiveScript replies without having to edit the RiveScript code 734 manually.""" 735 736 # Data to return. 737 result = { 738 "begin": { 739 "global": {}, 740 "var": {}, 741 "sub": {}, 742 "person": {}, 743 "array": {}, 744 "triggers": {}, 745 "that": {}, 746 }, 747 "topic": {}, 748 "that": {}, 749 "inherit": {}, 750 "include": {}, 751 } 752 753 # Populate the config fields. 754 if self._debug: 755 result["begin"]["global"]["debug"] = self._debug 756 if self._depth != 50: 757 result["begin"]["global"]["depth"] = 50 758 759 # Definitions 760 result["begin"]["var"] = self._bvars.copy() 761 result["begin"]["sub"] = self._subs.copy() 762 result["begin"]["person"] = self._person.copy() 763 result["begin"]["array"] = self._arrays.copy() 764 result["begin"]["global"].update(self._gvars.copy()) 765 766 # Topic Triggers. 767 for topic in self._topics: 768 dest = {} # Where to place the topic info 769 770 if topic == "__begin__": 771 # Begin block. 772 dest = result["begin"]["triggers"] 773 else: 774 # Normal topic. 775 if topic not in result["topic"]: 776 result["topic"][topic] = {} 777 dest = result["topic"][topic] 778 779 # Copy the triggers. 780 for trig, data in self._topics[topic].iteritems(): 781 dest[trig] = self._copy_trigger(trig, data) 782 783 # %Previous's. 784 for topic in self._thats: 785 dest = {} # Where to place the topic info 786 787 if topic == "__begin__": 788 # Begin block. 789 dest = result["begin"]["that"] 790 else: 791 # Normal topic. 792 if topic not in result["that"]: 793 result["that"][topic] = {} 794 dest = result["that"][topic] 795 796 # The "that" structure is backwards: bot reply, then trigger, then info. 797 for previous, pdata in self._thats[topic].iteritems(): 798 for trig, data in pdata.iteritems(): 799 dest[trig] = self._copy_trigger(trig, data, previous) 800 801 # Inherits/Includes. 802 for topic, data in self._lineage.iteritems(): 803 result["inherit"][topic] = [] 804 for inherit in data: 805 result["inherit"][topic].append(inherit) 806 for topic, data in self._includes.iteritems(): 807 result["include"][topic] = [] 808 for include in data: 809 result["include"][topic].append(include) 810 811 return result
812
813 - def write(self, fh, deparsed=None):
814 """Write the currently parsed RiveScript data into a file. 815 816 Pass either a file name (string) or a file handle object. 817 818 This uses `deparse()` to dump a representation of the loaded data and 819 writes it to the destination file. If you provide your own data as the 820 `deparsed` argument, it will use that data instead of calling 821 `deparse()` itself. This way you can use `deparse()`, edit the data, 822 and use that to write the RiveScript document (for example, to be used 823 by a user interface for editing RiveScript without writing the code 824 directly).""" 825 826 # Passed a string instead of a file handle? 827 if type(fh) is str: 828 fh = codecs.open(fh, "w", "utf-8") 829 830 # Deparse the loaded data. 831 if deparsed is None: 832 deparsed = self.deparse() 833 834 # Start at the beginning. 835 fh.write("// Written by rivescript.deparse()\n") 836 fh.write("! version = 2.0\n\n") 837 838 # Variables of all sorts! 839 for kind in ["global", "var", "sub", "person", "array"]: 840 if len(deparsed["begin"][kind].keys()) == 0: 841 continue 842 843 for var in sorted(deparsed["begin"][kind].keys()): 844 # Array types need to be separated by either spaces or pipes. 845 data = deparsed["begin"][kind][var] 846 if type(data) not in [str, text_type]: 847 needs_pipes = False 848 for test in data: 849 if " " in test: 850 needs_pipes = True 851 break 852 853 # Word-wrap the result, target width is 78 chars minus the 854 # kind, var, and spaces and equals sign. 855 width = 78 - len(kind) - len(var) - 4 856 857 if needs_pipes: 858 data = self._write_wrapped("|".join(data), sep="|") 859 else: 860 data = " ".join(data) 861 862 fh.write("! {kind} {var} = {data}\n".format( 863 kind=kind, 864 var=var, 865 data=data, 866 )) 867 fh.write("\n") 868 869 # Begin block. 870 if len(deparsed["begin"]["triggers"].keys()): 871 fh.write("> begin\n\n") 872 self._write_triggers(fh, deparsed["begin"]["triggers"], indent="\t") 873 fh.write("< begin\n\n") 874 875 # The topics. Random first! 876 topics = ["random"] 877 topics.extend(sorted(deparsed["topic"].keys())) 878 done_random = False 879 for topic in topics: 880 if topic not in deparsed["topic"]: continue 881 if topic == "random" and done_random: continue 882 if topic == "random": done_random = True 883 884 tagged = False # Used > topic tag 885 886 if topic != "random" or topic in deparsed["include"] or topic in deparsed["inherit"]: 887 tagged = True 888 fh.write("> topic " + topic) 889 890 if topic in deparsed["inherit"]: 891 fh.write(" inherits " + " ".join(deparsed["inherit"][topic])) 892 if topic in deparsed["include"]: 893 fh.write(" includes " + " ".join(deparsed["include"][topic])) 894 895 fh.write("\n\n") 896 897 indent = "\t" if tagged else "" 898 self._write_triggers(fh, deparsed["topic"][topic], indent=indent) 899 900 # Any %Previous's? 901 if topic in deparsed["that"]: 902 self._write_triggers(fh, deparsed["that"][topic], indent=indent) 903 904 if tagged: 905 fh.write("< topic\n\n") 906 907 return True
908
909 - def _copy_trigger(self, trig, data, previous=None):
910 """Make copies of all data below a trigger.""" 911 # Copied data. 912 dest = {} 913 914 if previous: 915 dest["previous"] = previous 916 917 if "redirect" in data and data["redirect"]: 918 # @Redirect 919 dest["redirect"] = data["redirect"] 920 921 if "condition" in data and len(data["condition"].keys()): 922 # *Condition 923 dest["condition"] = [] 924 for i in sorted(data["condition"].keys()): 925 dest["condition"].append(data["condition"][i]) 926 927 if "reply" in data and len(data["reply"].keys()): 928 # -Reply 929 dest["reply"] = [] 930 for i in sorted(data["reply"].keys()): 931 dest["reply"].append(data["reply"][i]) 932 933 return dest
934
935 - def _write_triggers(self, fh, triggers, indent=""):
936 """Write triggers to a file handle.""" 937 938 for trig in sorted(triggers.keys()): 939 fh.write(indent + "+ " + self._write_wrapped(trig, indent=indent) + "\n") 940 d = triggers[trig] 941 942 if "previous" in d: 943 fh.write(indent + "% " + self._write_wrapped(d["previous"], indent=indent) + "\n") 944 945 if "condition" in d: 946 for cond in d["condition"]: 947 fh.write(indent + "* " + self._write_wrapped(cond, indent=indent) + "\n") 948 949 if "redirect" in d: 950 fh.write(indent + "@ " + self._write_wrapped(d["redirect"], indent=indent) + "\n") 951 952 if "reply" in d: 953 for reply in d["reply"]: 954 fh.write(indent + "- " + self._write_wrapped(reply, indent=indent) + "\n") 955 956 fh.write("\n")
957
958 - def _write_wrapped(self, line, sep=" ", indent="", width=78):
959 """Word-wrap a line of RiveScript code for being written to a file.""" 960 961 words = line.split(sep) 962 lines = [] 963 line = "" 964 buf = [] 965 966 while len(words): 967 buf.append(words.pop(0)) 968 line = sep.join(buf) 969 if len(line) > width: 970 # Need to word wrap! 971 words.insert(0, buf.pop()) # Undo 972 lines.append(sep.join(buf)) 973 buf = [] 974 line = "" 975 976 # Straggler? 977 if line: 978 lines.append(line) 979 980 # Returned output 981 result = lines.pop(0) 982 if len(lines): 983 eol = "" 984 if sep == " ": 985 eol = "\s" 986 for item in lines: 987 result += eol + "\n" + indent + "^ " + item 988 989 return result
990
991 - def _initTT(self, toplevel, topic, trigger, what=''):
992 """Initialize a Topic Tree data structure.""" 993 if toplevel == 'topics': 994 if topic not in self._topics: 995 self._topics[topic] = {} 996 if trigger not in self._topics[topic]: 997 self._topics[topic][trigger] = {} 998 self._topics[topic][trigger]['reply'] = {} 999 self._topics[topic][trigger]['condition'] = {} 1000 self._topics[topic][trigger]['redirect'] = None 1001 elif toplevel == 'thats': 1002 if topic not in self._thats: 1003 self._thats[topic] = {} 1004 if trigger not in self._thats[topic]: 1005 self._thats[topic][trigger] = {} 1006 if what not in self._thats[topic][trigger]: 1007 self._thats[topic][trigger][what] = {} 1008 self._thats[topic][trigger][what]['reply'] = {} 1009 self._thats[topic][trigger][what]['condition'] = {} 1010 self._thats[topic][trigger][what]['redirect'] = {} 1011 elif toplevel == 'syntax': 1012 if what not in self._syntax: 1013 self._syntax[what] = {} 1014 if topic not in self._syntax[what]: 1015 self._syntax[what][topic] = {} 1016 if trigger not in self._syntax[what][topic]: 1017 self._syntax[what][topic][trigger] = {} 1018 self._syntax[what][topic][trigger]['reply'] = {} 1019 self._syntax[what][topic][trigger]['condition'] = {} 1020 self._syntax[what][topic][trigger]['redirect'] = {}
1021 1022 ############################################################################ 1023 # Sorting Methods # 1024 ############################################################################ 1025
1026 - def sort_replies(self, thats=False):
1027 """Sort the loaded triggers.""" 1028 # This method can sort both triggers and that's. 1029 triglvl = None 1030 sortlvl = None 1031 if thats: 1032 triglvl = self._thats 1033 sortlvl = 'thats' 1034 else: 1035 triglvl = self._topics 1036 sortlvl = 'topics' 1037 1038 # (Re)Initialize the sort cache. 1039 self._sorted[sortlvl] = {} 1040 1041 self._say("Sorting triggers...") 1042 1043 # Loop through all the topics. 1044 for topic in triglvl: 1045 self._say("Analyzing topic " + topic) 1046 1047 # Collect a list of all the triggers we're going to need to worry 1048 # about. If this topic inherits another topic, we need to 1049 # recursively add those to the list. 1050 alltrig = self._topic_triggers(topic, triglvl) 1051 1052 # Keep in mind here that there is a difference between 'includes' 1053 # and 'inherits' -- topics that inherit other topics are able to 1054 # OVERRIDE triggers that appear in the inherited topic. This means 1055 # that if the top topic has a trigger of simply '*', then *NO* 1056 # triggers are capable of matching in ANY inherited topic, because 1057 # even though * has the lowest sorting priority, it has an automatic 1058 # priority over all inherited topics. 1059 # 1060 # The _topic_triggers method takes this into account. All topics 1061 # that inherit other topics will have their triggers prefixed with 1062 # a fictional {inherits} tag, which would start at {inherits=0} and 1063 # increment if the topic tree has other inheriting topics. So we can 1064 # use this tag to make sure topics that inherit things will have 1065 # their triggers always be on top of the stack, from inherits=0 to 1066 # inherits=n. 1067 1068 # Sort these triggers. 1069 running = self._sort_trigger_set(alltrig) 1070 1071 # Save this topic's sorted list. 1072 if sortlvl not in self._sorted: 1073 self._sorted[sortlvl] = {} 1074 self._sorted[sortlvl][topic] = running 1075 1076 # And do it all again for %Previous! 1077 if not thats: 1078 # This will sort the %Previous lines to best match the bot's last reply. 1079 self.sort_replies(True) 1080 1081 # If any of those %Previous's had more than one +trigger for them, 1082 # this will sort all those +triggers to pair back the best human 1083 # interaction. 1084 self._sort_that_triggers() 1085 1086 # Also sort both kinds of substitutions. 1087 self._sort_list('subs', self._subs) 1088 self._sort_list('person', self._person)
1089
1090 - def _sort_that_triggers(self):
1091 """Make a sorted list of triggers that correspond to %Previous groups.""" 1092 self._say("Sorting reverse triggers for %Previous groups...") 1093 1094 if "that_trig" not in self._sorted: 1095 self._sorted["that_trig"] = {} 1096 1097 for topic in self._thats: 1098 if topic not in self._sorted["that_trig"]: 1099 self._sorted["that_trig"][topic] = {} 1100 1101 for bottrig in self._thats[topic]: 1102 if bottrig not in self._sorted["that_trig"][topic]: 1103 self._sorted["that_trig"][topic][bottrig] = [] 1104 triggers = self._sort_trigger_set(self._thats[topic][bottrig].keys()) 1105 self._sorted["that_trig"][topic][bottrig] = triggers
1106
1107 - def _sort_trigger_set(self, triggers):
1108 """Sort a group of triggers in optimal sorting order.""" 1109 1110 # Create a priority map. 1111 prior = { 1112 0: [] # Default priority=0 1113 } 1114 1115 for trig in triggers: 1116 match, weight = re.search(RE.weight, trig), 0 1117 if match: 1118 weight = int(match.group(1)) 1119 if weight not in prior: 1120 prior[weight] = [] 1121 1122 prior[weight].append(trig) 1123 1124 # Keep a running list of sorted triggers for this topic. 1125 running = [] 1126 1127 # Sort them by priority. 1128 for p in sorted(prior.keys(), reverse=True): 1129 self._say("\tSorting triggers with priority " + str(p)) 1130 1131 # So, some of these triggers may include {inherits} tags, if they 1132 # came form a topic which inherits another topic. Lower inherits 1133 # values mean higher priority on the stack. 1134 inherits = -1 # -1 means no {inherits} tag 1135 highest_inherits = -1 # highest inheritance number seen 1136 1137 # Loop through and categorize these triggers. 1138 track = { 1139 inherits: self._init_sort_track() 1140 } 1141 1142 for trig in prior[p]: 1143 self._say("\t\tLooking at trigger: " + trig) 1144 1145 # See if it has an inherits tag. 1146 match = re.search(RE.inherit, trig) 1147 if match: 1148 inherits = int(match.group(1)) 1149 if inherits > highest_inherits: 1150 highest_inherits = inherits 1151 self._say("\t\t\tTrigger belongs to a topic which inherits other topics: level=" + str(inherits)) 1152 trig = re.sub(RE.inherit, "", trig) 1153 else: 1154 inherits = -1 1155 1156 # If this is the first time we've seen this inheritance level, 1157 # initialize its track structure. 1158 if inherits not in track: 1159 track[inherits] = self._init_sort_track() 1160 1161 # Start inspecting the trigger's contents. 1162 if '_' in trig: 1163 # Alphabetic wildcard included. 1164 cnt = self._word_count(trig) 1165 self._say("\t\t\tHas a _ wildcard with " + str(cnt) + " words.") 1166 if cnt > 1: 1167 if cnt not in track[inherits]['alpha']: 1168 track[inherits]['alpha'][cnt] = [] 1169 track[inherits]['alpha'][cnt].append(trig) 1170 else: 1171 track[inherits]['under'].append(trig) 1172 elif '#' in trig: 1173 # Numeric wildcard included. 1174 cnt = self._word_count(trig) 1175 self._say("\t\t\tHas a # wildcard with " + str(cnt) + " words.") 1176 if cnt > 1: 1177 if cnt not in track[inherits]['number']: 1178 track[inherits]['number'][cnt] = [] 1179 track[inherits]['number'][cnt].append(trig) 1180 else: 1181 track[inherits]['pound'].append(trig) 1182 elif '*' in trig: 1183 # Wildcard included. 1184 cnt = self._word_count(trig) 1185 self._say("\t\t\tHas a * wildcard with " + str(cnt) + " words.") 1186 if cnt > 1: 1187 if cnt not in track[inherits]['wild']: 1188 track[inherits]['wild'][cnt] = [] 1189 track[inherits]['wild'][cnt].append(trig) 1190 else: 1191 track[inherits]['star'].append(trig) 1192 elif '[' in trig: 1193 # Optionals included. 1194 cnt = self._word_count(trig) 1195 self._say("\t\t\tHas optionals and " + str(cnt) + " words.") 1196 if cnt not in track[inherits]['option']: 1197 track[inherits]['option'][cnt] = [] 1198 track[inherits]['option'][cnt].append(trig) 1199 else: 1200 # Totally atomic. 1201 cnt = self._word_count(trig) 1202 self._say("\t\t\tTotally atomic and " + str(cnt) + " words.") 1203 if cnt not in track[inherits]['atomic']: 1204 track[inherits]['atomic'][cnt] = [] 1205 track[inherits]['atomic'][cnt].append(trig) 1206 1207 # Move the no-{inherits} triggers to the bottom of the stack. 1208 track[highest_inherits + 1] = track[-1] 1209 del(track[-1]) 1210 1211 # Add this group to the sort list. 1212 for ip in sorted(track.keys()): 1213 self._say("ip=" + str(ip)) 1214 for kind in ['atomic', 'option', 'alpha', 'number', 'wild']: 1215 for wordcnt in sorted(track[ip][kind], reverse=True): 1216 # Triggers with a matching word count should be sorted 1217 # by length, descending. 1218 running.extend(sorted(track[ip][kind][wordcnt], key=len, reverse=True)) 1219 running.extend(sorted(track[ip]['under'], key=len, reverse=True)) 1220 running.extend(sorted(track[ip]['pound'], key=len, reverse=True)) 1221 running.extend(sorted(track[ip]['star'], key=len, reverse=True)) 1222 return running
1223
1224 - def _sort_list(self, name, items):
1225 """Sort a simple list by number of words and length.""" 1226 1227 def by_length(word1, word2): 1228 return len(word2) - len(word1)
1229 1230 # Initialize the list sort buffer. 1231 if "lists" not in self._sorted: 1232 self._sorted["lists"] = {} 1233 self._sorted["lists"][name] = [] 1234 1235 # Track by number of words. 1236 track = {} 1237 1238 # Loop through each item. 1239 for item in items: 1240 # Count the words. 1241 cword = self._word_count(item, all=True) 1242 if cword not in track: 1243 track[cword] = [] 1244 track[cword].append(item) 1245 1246 # Sort them. 1247 output = [] 1248 for count in sorted(track.keys(), reverse=True): 1249 sort = sorted(track[count], key=len, reverse=True) 1250 output.extend(sort) 1251 1252 self._sorted["lists"][name] = output
1253
1254 - def _init_sort_track(self):
1255 """Returns a new dict for keeping track of triggers for sorting.""" 1256 return { 1257 'atomic': {}, # Sort by number of whole words 1258 'option': {}, # Sort optionals by number of words 1259 'alpha': {}, # Sort alpha wildcards by no. of words 1260 'number': {}, # Sort number wildcards by no. of words 1261 'wild': {}, # Sort wildcards by no. of words 1262 'pound': [], # Triggers of just # 1263 'under': [], # Triggers of just _ 1264 'star': [] # Triggers of just * 1265 }
1266 1267 1268 ############################################################################ 1269 # Public Configuration Methods # 1270 ############################################################################ 1271
1272 - def set_handler(self, language, obj):
1273 """Define a custom language handler for RiveScript objects. 1274 1275 language: The lowercased name of the programming language, 1276 e.g. python, javascript, perl 1277 obj: An instance of a class object that provides the following interface: 1278 1279 class MyObjectHandler: 1280 def __init__(self): 1281 pass 1282 def load(self, name, code): 1283 # name = the name of the object from the RiveScript code 1284 # code = the source code of the object 1285 def call(self, rs, name, fields): 1286 # rs = the current RiveScript interpreter object 1287 # name = the name of the object being called 1288 # fields = array of arguments passed to the object 1289 return reply 1290 1291 Pass in a None value for the object to delete an existing handler (for example, 1292 to prevent Python code from being able to be run by default). 1293 1294 Look in the `eg` folder of the rivescript-python distribution for an example 1295 script that sets up a JavaScript language handler.""" 1296 1297 # Allow them to delete a handler too. 1298 if obj is None: 1299 if language in self._handlers: 1300 del self._handlers[language] 1301 else: 1302 self._handlers[language] = obj
1303
1304 - def set_subroutine(self, name, code):
1305 """Define a Python object from your program. 1306 1307 This is equivalent to having an object defined in the RiveScript code, except 1308 your Python code is defining it instead. `name` is the name of the object, and 1309 `code` is a Python function (a `def`) that accepts rs,args as its parameters. 1310 1311 This method is only available if there is a Python handler set up (which there 1312 is by default, unless you've called set_handler("python", None)).""" 1313 1314 # Do we have a Python handler? 1315 if 'python' in self._handlers: 1316 self._handlers['python']._objects[name] = code 1317 else: 1318 self._warn("Can't set_subroutine: no Python object handler!")
1319
1320 - def set_global(self, name, value):
1321 """Set a global variable. 1322 1323 Equivalent to `! global` in RiveScript code. Set to None to delete.""" 1324 if value is None: 1325 # Unset the variable. 1326 if name in self._gvars: 1327 del self._gvars[name] 1328 self._gvars[name] = value
1329
1330 - def set_variable(self, name, value):
1331 """Set a bot variable. 1332 1333 Equivalent to `! var` in RiveScript code. Set to None to delete.""" 1334 if value is None: 1335 # Unset the variable. 1336 if name in self._bvars: 1337 del self._bvars[name] 1338 self._bvars[name] = value
1339
1340 - def set_substitution(self, what, rep):
1341 """Set a substitution. 1342 1343 Equivalent to `! sub` in RiveScript code. Set to None to delete.""" 1344 if rep is None: 1345 # Unset the variable. 1346 if what in self._subs: 1347 del self._subs[what] 1348 self._subs[what] = rep
1349
1350 - def set_person(self, what, rep):
1351 """Set a person substitution. 1352 1353 Equivalent to `! person` in RiveScript code. Set to None to delete.""" 1354 if rep is None: 1355 # Unset the variable. 1356 if what in self._person: 1357 del self._person[what] 1358 self._person[what] = rep
1359
1360 - def set_uservar(self, user, name, value):
1361 """Set a variable for a user.""" 1362 1363 if user not in self._users: 1364 self._users[user] = {"topic": "random"} 1365 1366 self._users[user][name] = value
1367
1368 - def get_uservar(self, user, name):
1369 """Get a variable about a user. 1370 1371 If the user has no data at all, returns None. If the user doesn't have a value 1372 set for the variable you want, returns the string 'undefined'.""" 1373 1374 if user in self._users: 1375 if name in self._users[user]: 1376 return self._users[user][name] 1377 else: 1378 return "undefined" 1379 else: 1380 return None
1381
1382 - def get_uservars(self, user=None):
1383 """Get all variables about a user (or all users). 1384 1385 If no username is passed, returns the entire user database structure. Otherwise, 1386 only returns the variables for the given user, or None if none exist.""" 1387 1388 if user is None: 1389 # All the users! 1390 return self._users 1391 elif user in self._users: 1392 # Just this one! 1393 return self._users[user] 1394 else: 1395 # No info. 1396 return None
1397
1398 - def clear_uservars(self, user=None):
1399 """Delete all variables about a user (or all users). 1400 1401 If no username is passed, deletes all variables about all users. Otherwise, only 1402 deletes all variables for the given user.""" 1403 1404 if user is None: 1405 # All the users! 1406 self._users = {} 1407 elif user in self._users: 1408 # Just this one. 1409 self._users[user] = {}
1410
1411 - def freeze_uservars(self, user):
1412 """Freeze the variable state for a user. 1413 1414 This will clone and preserve a user's entire variable state, so that it can be 1415 restored later with `thaw_uservars`.""" 1416 1417 if user in self._users: 1418 # Clone the user's data. 1419 self._freeze[user] = copy.deepcopy(self._users[user]) 1420 else: 1421 self._warn("Can't freeze vars for user " + user + ": not found!")
1422
1423 - def thaw_uservars(self, user, action="thaw"):
1424 """Thaw a user's frozen variables. 1425 1426 The `action` can be one of the following options: 1427 1428 discard: Don't restore the user's variables, just delete the frozen copy. 1429 keep: Keep the frozen copy after restoring the variables. 1430 thaw: Restore the variables, then delete the frozen copy (default).""" 1431 1432 if user in self._freeze: 1433 # What are we doing? 1434 if action == "thaw": 1435 # Thawing them out. 1436 self.clear_uservars(user) 1437 self._users[user] = copy.deepcopy(self._freeze[user]) 1438 del self._freeze[user] 1439 elif action == "discard": 1440 # Just discard the frozen copy. 1441 del self._freeze[user] 1442 elif action == "keep": 1443 # Keep the frozen copy afterward. 1444 self.clear_uservars(user) 1445 self._users[user] = copy.deepcopy(self._freeze[user]) 1446 else: 1447 self._warn("Unsupported thaw action") 1448 else: 1449 self._warn("Can't thaw vars for user " + user + ": not found!")
1450
1451 - def last_match(self, user):
1452 """Get the last trigger matched for the user. 1453 1454 This will return the raw trigger text that the user's last message matched. If 1455 there was no match, this will return None.""" 1456 return self.get_uservar(user, "__lastmatch__")
1457
1458 - def trigger_info(self, trigger=None, dump=False):
1459 """Get information about a trigger. 1460 1461 Pass in a raw trigger to find out what file name and line number it appeared at. 1462 This is useful for e.g. tracking down the location of the trigger last matched 1463 by the user via last_match(). Returns a list of matching triggers, containing 1464 their topics, filenames and line numbers. Returns None if there weren't 1465 any matches found. 1466 1467 The keys in the trigger info is as follows: 1468 1469 * category: Either 'topic' (for normal) or 'thats' (for %Previous triggers) 1470 * topic: The topic name 1471 * trigger: The raw trigger text 1472 * filename: The filename the trigger was found in. 1473 * lineno: The line number the trigger was found on. 1474 1475 Pass in a true value for `dump`, and the entire syntax tracking 1476 tree is returned.""" 1477 if dump: 1478 return self._syntax 1479 1480 response = None 1481 1482 # Search the syntax tree for the trigger. 1483 for category in self._syntax: 1484 for topic in self._syntax[category]: 1485 if trigger in self._syntax[category][topic]: 1486 # We got a match! 1487 if response is None: 1488 response = list() 1489 fname, lineno = self._syntax[category][topic][trigger]['trigger'] 1490 response.append(dict( 1491 category=category, 1492 topic=topic, 1493 trigger=trigger, 1494 filename=fname, 1495 line=lineno, 1496 )) 1497 1498 return response
1499
1500 - def current_user(self):
1501 """Retrieve the user ID of the current user talking to your bot. 1502 1503 This is mostly useful inside of a Python object macro to get the user ID of the 1504 person who caused the object macro to be invoked (i.e. to set a variable for 1505 that user from within the object). 1506 1507 This will return None if used outside of the context of getting a reply (i.e. 1508 the value is unset at the end of the `reply()` method).""" 1509 if self._current_user is None: 1510 # They're doing it wrong. 1511 self._warn("current_user() is meant to be used from within a Python object macro!") 1512 return self._current_user
1513 1514 ############################################################################ 1515 # Reply Fetching Methods # 1516 ############################################################################ 1517
1518 - def reply(self, user, msg, errors_as_replies=True):
1519 """Fetch a reply from the RiveScript brain.""" 1520 self._say("Get reply to [" + user + "] " + msg) 1521 1522 # Store the current user in case an object macro needs it. 1523 self._current_user = user 1524 1525 # Format their message. 1526 msg = self._format_message(msg) 1527 1528 reply = '' 1529 1530 # If the BEGIN block exists, consult it first. 1531 if "__begin__" in self._topics: 1532 begin = self._getreply(user, 'request', context='begin', ignore_object_errors=errors_as_replies) 1533 1534 # Okay to continue? 1535 if '{ok}' in begin: 1536 try: 1537 reply = self._getreply(user, msg, ignore_object_errors=errors_as_replies) 1538 except RiveScriptError as e: 1539 if not errors_as_replies: 1540 raise 1541 reply = e.error_message 1542 begin = begin.replace('{ok}', reply) 1543 1544 reply = begin 1545 1546 # Run more tag substitutions. 1547 reply = self._process_tags(user, msg, reply, ignore_object_errors=errors_as_replies) 1548 else: 1549 # Just continue then. 1550 try: 1551 reply = self._getreply(user, msg, ignore_object_errors=errors_as_replies) 1552 except RiveScriptError as e: 1553 if not errors_as_replies: 1554 raise 1555 reply = e.error_message 1556 1557 # Save their reply history. 1558 oldInput = self._users[user]['__history__']['input'][:8] 1559 self._users[user]['__history__']['input'] = [msg] 1560 self._users[user]['__history__']['input'].extend(oldInput) 1561 oldReply = self._users[user]['__history__']['reply'][:8] 1562 self._users[user]['__history__']['reply'] = [reply] 1563 self._users[user]['__history__']['reply'].extend(oldReply) 1564 1565 # Unset the current user. 1566 self._current_user = None 1567 1568 return reply
1569
1570 - def _format_message(self, msg, botreply=False):
1571 """Format a user's message for safe processing.""" 1572 1573 # Make sure the string is Unicode for Python 2. 1574 if sys.version_info[0] < 3 and isinstance(msg, str): 1575 msg = msg.decode('utf8') 1576 1577 # Lowercase it. 1578 msg = msg.lower() 1579 1580 # Run substitutions on it. 1581 msg = self._substitute(msg, "subs") 1582 1583 # In UTF-8 mode, only strip metacharacters and HTML brackets 1584 # (to protect from obvious XSS attacks). 1585 if self._utf8: 1586 msg = re.sub(RE.utf8_meta, '', msg) 1587 1588 # For the bot's reply, also strip common punctuation. 1589 if botreply: 1590 msg = re.sub(RE.utf8_punct, '', msg) 1591 else: 1592 # For everything else, strip all non-alphanumerics. 1593 msg = self._strip_nasties(msg) 1594 1595 return msg
1596
1597 - def _getreply(self, user, msg, context='normal', step=0, ignore_object_errors=True):
1598 # Needed to sort replies? 1599 if 'topics' not in self._sorted: 1600 raise RepliesNotSortedError("You must call sort_replies() once you are done loading RiveScript documents") 1601 1602 # Initialize the user's profile? 1603 if user not in self._users: 1604 self._users[user] = {'topic': 'random'} 1605 1606 # Collect data on the user. 1607 topic = self._users[user]['topic'] 1608 stars = [] 1609 thatstars = [] # For %Previous's. 1610 reply = '' 1611 1612 # Avoid letting them fall into a missing topic. 1613 if topic not in self._topics: 1614 self._warn("User " + user + " was in an empty topic named '" + topic + "'") 1615 topic = self._users[user]['topic'] = 'random' 1616 1617 # Avoid deep recursion. 1618 if step > self._depth: 1619 raise DeepRecursionError 1620 1621 # Are we in the BEGIN statement? 1622 if context == 'begin': 1623 topic = '__begin__' 1624 1625 # Initialize this user's history. 1626 if '__history__' not in self._users[user]: 1627 self._users[user]['__history__'] = { 1628 'input': [ 1629 'undefined', 'undefined', 'undefined', 'undefined', 1630 'undefined', 'undefined', 'undefined', 'undefined', 1631 'undefined' 1632 ], 1633 'reply': [ 1634 'undefined', 'undefined', 'undefined', 'undefined', 1635 'undefined', 'undefined', 'undefined', 'undefined', 1636 'undefined' 1637 ] 1638 } 1639 1640 # More topic sanity checking. 1641 if topic not in self._topics: 1642 # This was handled before, which would mean topic=random and 1643 # it doesn't exist. Serious issue! 1644 raise NoDefaultRandomTopicError("no default topic 'random' was found") 1645 1646 # Create a pointer for the matched data when we find it. 1647 matched = None 1648 matchedTrigger = None 1649 foundMatch = False 1650 1651 # See if there were any %Previous's in this topic, or any topic related 1652 # to it. This should only be done the first time -- not during a 1653 # recursive redirection. This is because in a redirection, "lastreply" 1654 # is still gonna be the same as it was the first time, causing an 1655 # infinite loop! 1656 if step == 0: 1657 allTopics = [topic] 1658 if topic in self._includes or topic in self._lineage: 1659 # Get all the topics! 1660 allTopics = self._get_topic_tree(topic) 1661 1662 # Scan them all! 1663 for top in allTopics: 1664 self._say("Checking topic " + top + " for any %Previous's.") 1665 if top in self._sorted["thats"]: 1666 self._say("There is a %Previous in this topic!") 1667 1668 # Do we have history yet? 1669 lastReply = self._users[user]["__history__"]["reply"][0] 1670 1671 # Format the bot's last reply the same way as the human's. 1672 lastReply = self._format_message(lastReply, botreply=True) 1673 1674 self._say("lastReply: " + lastReply) 1675 1676 # See if it's a match. 1677 for trig in self._sorted["thats"][top]: 1678 botside = self._reply_regexp(user, trig) 1679 self._say("Try to match lastReply (" + lastReply + ") to " + trig) 1680 1681 # Match?? 1682 match = re.match(botside, lastReply) 1683 if match: 1684 # Huzzah! See if OUR message is right too. 1685 self._say("Bot side matched!") 1686 thatstars = match.groups() 1687 for subtrig in self._sorted["that_trig"][top][trig]: 1688 humanside = self._reply_regexp(user, subtrig) 1689 self._say("Now try to match " + msg + " to " + subtrig) 1690 1691 match = re.match(humanside, msg) 1692 if match: 1693 self._say("Found a match!") 1694 matched = self._thats[top][trig][subtrig] 1695 matchedTrigger = subtrig 1696 foundMatch = True 1697 1698 # Get the stars! 1699 stars = match.groups() 1700 break 1701 1702 # Break if we found a match. 1703 if foundMatch: 1704 break 1705 # Break if we found a match. 1706 if foundMatch: 1707 break 1708 1709 # Search their topic for a match to their trigger. 1710 if not foundMatch: 1711 for trig in self._sorted["topics"][topic]: 1712 # Process the triggers. 1713 regexp = self._reply_regexp(user, trig) 1714 self._say("Try to match %r against %r (%r)" % (msg, trig, regexp)) 1715 1716 # Python's regular expression engine is slow. Try a verbatim 1717 # match if this is an atomic trigger. 1718 isAtomic = self._is_atomic(trig) 1719 isMatch = False 1720 if isAtomic: 1721 # Only look for exact matches, no sense running atomic triggers 1722 # through the regexp engine. 1723 if msg == trig: 1724 isMatch = True 1725 else: 1726 # Non-atomic triggers always need the regexp. 1727 match = re.match(regexp, msg) 1728 if match: 1729 # The regexp matched! 1730 isMatch = True 1731 1732 # Collect the stars. 1733 stars = match.groups() 1734 1735 if isMatch: 1736 self._say("Found a match!") 1737 1738 # We found a match, but what if the trigger we've matched 1739 # doesn't belong to our topic? Find it! 1740 if trig not in self._topics[topic]: 1741 # We have to find it. 1742 matched = self._find_trigger_by_inheritance(topic, trig) 1743 else: 1744 # We do have it! 1745 matched = self._topics[topic][trig] 1746 1747 foundMatch = True 1748 matchedTrigger = trig 1749 break 1750 1751 # Store what trigger they matched on. If their matched trigger is None, 1752 # this will be too, which is great. 1753 self._users[user]["__lastmatch__"] = matchedTrigger 1754 1755 if matched: 1756 for nil in [1]: 1757 # See if there are any hard redirects. 1758 if matched["redirect"]: 1759 self._say("Redirecting us to " + matched["redirect"]) 1760 redirect = self._process_tags(user, msg, matched["redirect"], stars, thatstars, step, 1761 ignore_object_errors) 1762 self._say("Pretend user said: " + redirect) 1763 reply = self._getreply(user, redirect, step=(step + 1), ignore_object_errors=ignore_object_errors) 1764 break 1765 1766 # Check the conditionals. 1767 for con in sorted(matched["condition"]): 1768 halves = re.split(RE.cond_split, matched["condition"][con]) 1769 if halves and len(halves) == 2: 1770 condition = re.match(RE.cond_parse, halves[0]) 1771 if condition: 1772 left = condition.group(1) 1773 eq = condition.group(2) 1774 right = condition.group(3) 1775 potreply = halves[1] 1776 self._say("Left: " + left + "; eq: " + eq + "; right: " + right + " => " + potreply) 1777 1778 # Process tags all around. 1779 left = self._process_tags(user, msg, left, stars, thatstars, step, ignore_object_errors) 1780 right = self._process_tags(user, msg, right, stars, thatstars, step, ignore_object_errors) 1781 1782 # Defaults? 1783 if len(left) == 0: 1784 left = 'undefined' 1785 if len(right) == 0: 1786 right = 'undefined' 1787 1788 self._say("Check if " + left + " " + eq + " " + right) 1789 1790 # Validate it. 1791 passed = False 1792 if eq == 'eq' or eq == '==': 1793 if left == right: 1794 passed = True 1795 elif eq == 'ne' or eq == '!=' or eq == '<>': 1796 if left != right: 1797 passed = True 1798 else: 1799 # Gasp, dealing with numbers here... 1800 try: 1801 left, right = int(left), int(right) 1802 if eq == '<': 1803 if left < right: 1804 passed = True 1805 elif eq == '<=': 1806 if left <= right: 1807 passed = True 1808 elif eq == '>': 1809 if left > right: 1810 passed = True 1811 elif eq == '>=': 1812 if left >= right: 1813 passed = True 1814 except: 1815 self._warn("Failed to evaluate numeric condition!") 1816 1817 # How truthful? 1818 if passed: 1819 reply = potreply 1820 break 1821 1822 # Have our reply yet? 1823 if len(reply) > 0: 1824 break 1825 1826 # Process weights in the replies. 1827 bucket = [] 1828 for rep in sorted(matched["reply"]): 1829 text = matched["reply"][rep] 1830 weight = 1 1831 match = re.match(RE.weight, text) 1832 if match: 1833 weight = int(match.group(1)) 1834 if weight <= 0: 1835 self._warn("Can't have a weight <= 0!") 1836 weight = 1 1837 for i in range(0, weight): 1838 bucket.append(text) 1839 1840 # Get a random reply. 1841 reply = random.choice(bucket) 1842 break 1843 1844 # Still no reply? 1845 if not foundMatch: 1846 raise NoMatchError 1847 elif len(reply) == 0: 1848 raise NoReplyError 1849 1850 self._say("Reply: " + reply) 1851 1852 # Process tags for the BEGIN block. 1853 if context == "begin": 1854 # BEGIN blocks can only set topics and uservars. The rest happen 1855 # later! 1856 reTopic = re.findall(RE.topic_tag, reply) 1857 for match in reTopic: 1858 self._say("Setting user's topic to " + match) 1859 self._users[user]["topic"] = match 1860 reply = reply.replace('{{topic={match}}}'.format(match=match), '') 1861 1862 reSet = re.findall(RE.set_tag, reply) 1863 for match in reSet: 1864 self._say("Set uservar " + str(match[0]) + "=" + str(match[1])) 1865 self._users[user][match[0]] = match[1] 1866 reply = reply.replace('<set {key}={value}>'.format(key=match[0], value=match[1]), '') 1867 else: 1868 # Process more tags if not in BEGIN. 1869 reply = self._process_tags(user, msg, reply, stars, thatstars, step, ignore_object_errors) 1870 1871 return reply
1872
1873 - def _substitute(self, msg, kind):
1874 """Run a kind of substitution on a message.""" 1875 1876 # Safety checking. 1877 if 'lists' not in self._sorted: 1878 raise RepliesNotSortedError("You must call sort_replies() once you are done loading RiveScript documents") 1879 if kind not in self._sorted["lists"]: 1880 raise RepliesNotSortedError("You must call sort_replies() once you are done loading RiveScript documents") 1881 1882 # Get the substitution map. 1883 subs = None 1884 if kind == 'subs': 1885 subs = self._subs 1886 else: 1887 subs = self._person 1888 1889 # Make placeholders each time we substitute something. 1890 ph = [] 1891 i = 0 1892 1893 for pattern in self._sorted["lists"][kind]: 1894 result = subs[pattern] 1895 1896 # Make a placeholder. 1897 ph.append(result) 1898 placeholder = "\x00%d\x00" % i 1899 i += 1 1900 1901 cache = self._regexc[kind][pattern] 1902 msg = re.sub(cache["sub1"], placeholder, msg) 1903 msg = re.sub(cache["sub2"], placeholder + r'\1', msg) 1904 msg = re.sub(cache["sub3"], r'\1' + placeholder + r'\2', msg) 1905 msg = re.sub(cache["sub4"], r'\1' + placeholder, msg) 1906 1907 placeholders = re.findall(RE.placeholder, msg) 1908 for match in placeholders: 1909 i = int(match) 1910 result = ph[i] 1911 msg = msg.replace('\x00' + match + '\x00', result) 1912 1913 # Strip & return. 1914 return msg.strip()
1915
1916 - def _precompile_substitution(self, kind, pattern):
1917 """Pre-compile the regexp for a substitution pattern. 1918 1919 This will speed up the substitutions that happen at the beginning of 1920 the reply fetching process. With the default brain, this took the 1921 time for _substitute down from 0.08s to 0.02s""" 1922 if pattern not in self._regexc[kind]: 1923 qm = re.escape(pattern) 1924 self._regexc[kind][pattern] = { 1925 "qm": qm, 1926 "sub1": re.compile(r'^' + qm + r'$'), 1927 "sub2": re.compile(r'^' + qm + r'(\W+)'), 1928 "sub3": re.compile(r'(\W+)' + qm + r'(\W+)'), 1929 "sub4": re.compile(r'(\W+)' + qm + r'$'), 1930 }
1931
1932 - def _reply_regexp(self, user, regexp):
1933 """Prepares a trigger for the regular expression engine.""" 1934 1935 if regexp in self._regexc["trigger"]: 1936 # Already compiled this one! 1937 return self._regexc["trigger"][regexp] 1938 1939 # If the trigger is simply '*' then the * there needs to become (.*?) 1940 # to match the blank string too. 1941 regexp = re.sub(RE.zero_star, r'<zerowidthstar>', regexp) 1942 1943 # Simple replacements. 1944 regexp = regexp.replace('*', '(.+?)') # Convert * into (.+?) 1945 regexp = regexp.replace('#', '(\d+?)') # Convert # into (\d+?) 1946 regexp = regexp.replace('_', '(\w+?)') # Convert _ into (\w+?) 1947 regexp = re.sub(r'\{weight=\d+\}', '', regexp) # Remove {weight} tags 1948 regexp = regexp.replace('<zerowidthstar>', r'(.*?)') 1949 1950 # Optionals. 1951 optionals = re.findall(RE.optionals, regexp) 1952 for match in optionals: 1953 parts = match.split("|") 1954 new = [] 1955 for p in parts: 1956 p = r'(?:\\s|\\b)+{}(?:\\s|\\b)+'.format(p) 1957 new.append(p) 1958 1959 # If this optional had a star or anything in it, make it 1960 # non-matching. 1961 pipes = '|'.join(new) 1962 pipes = re.sub(re.escape('(.+?)'), '(?:.+?)', pipes) 1963 pipes = re.sub(re.escape('(\d+?)'), '(?:\d+?)', pipes) 1964 pipes = re.sub(re.escape('([A-Za-z]+?)'), '(?:[A-Za-z]+?)', pipes) 1965 1966 regexp = re.sub(r'\s*\[' + re.escape(match) + '\]\s*', 1967 '(?:' + pipes + r'|(?:\\s|\\b))', regexp) 1968 1969 # _ wildcards can't match numbers! 1970 regexp = re.sub(RE.literal_w, r'[A-Za-z]', regexp) 1971 1972 # Filter in arrays. 1973 arrays = re.findall(RE.array, regexp) 1974 for array in arrays: 1975 rep = '' 1976 if array in self._arrays: 1977 rep = r'(?:' + '|'.join(self._arrays[array]) + ')' 1978 regexp = re.sub(r'\@' + re.escape(array) + r'\b', rep, regexp) 1979 1980 # Filter in bot variables. 1981 bvars = re.findall(RE.bot_tag, regexp) 1982 for var in bvars: 1983 rep = '' 1984 if var in self._bvars: 1985 rep = self._strip_nasties(self._bvars[var]) 1986 regexp = regexp.replace('<bot {var}>'.format(var=var), rep) 1987 1988 # Filter in user variables. 1989 uvars = re.findall(RE.get_tag, regexp) 1990 for var in uvars: 1991 rep = '' 1992 if var in self._users[user]: 1993 rep = self._strip_nasties(self._users[user][var]) 1994 regexp = regexp.replace('<get {var}>'.format(var=var), rep) 1995 1996 # Filter in <input> and <reply> tags. This is a slow process, so only 1997 # do it if we have to! 1998 if '<input' in regexp or '<reply' in regexp: 1999 for type in ['input', 'reply']: 2000 tags = re.findall(r'<' + type + r'([0-9])>', regexp) 2001 for index in tags: 2002 rep = self._format_message(self._users[user]['__history__'][type][int(index) - 1]) 2003 regexp = regexp.replace('<{type}{index}>'.format(type=type, index=index), rep) 2004 regexp = regexp.replace('<{type}>'.format(type=type), 2005 self._format_message(self._users[user]['__history__'][type][0])) 2006 # TODO: the Perl version doesn't do just <input>/<reply> in trigs! 2007 2008 return re.compile(r'^' + regexp + r'$')
2009
2010 - def _precompile_regexp(self, trigger):
2011 """Precompile the regex for most triggers. 2012 2013 If the trigger is non-atomic, and doesn't include dynamic tags like 2014 `<bot>`, `<get>`, `<input>/<reply>` or arrays, it can be precompiled 2015 and save time when matching.""" 2016 if self._is_atomic(trigger): 2017 return # Don't need a regexp for atomic triggers. 2018 2019 # Check for dynamic tags. 2020 for tag in ["@", "<bot", "<get", "<input", "<reply"]: 2021 if tag in trigger: 2022 return # Can't precompile this trigger. 2023 2024 self._regexc["trigger"][trigger] = self._reply_regexp(None, trigger)
2025
2026 - def _process_tags(self, user, msg, reply, st=[], bst=[], depth=0, ignore_object_errors=True):
2027 """Post process tags in a message.""" 2028 stars = [''] 2029 stars.extend(st) 2030 botstars = [''] 2031 botstars.extend(bst) 2032 if len(stars) == 1: 2033 stars.append("undefined") 2034 if len(botstars) == 1: 2035 botstars.append("undefined") 2036 2037 # Tag shortcuts. 2038 reply = reply.replace('<person>', '{person}<star>{/person}') 2039 reply = reply.replace('<@>', '{@<star>}') 2040 reply = reply.replace('<formal>', '{formal}<star>{/formal}') 2041 reply = reply.replace('<sentence>', '{sentence}<star>{/sentence}') 2042 reply = reply.replace('<uppercase>', '{uppercase}<star>{/uppercase}') 2043 reply = reply.replace('<lowercase>', '{lowercase}<star>{/lowercase}') 2044 2045 # Weight and <star> tags. 2046 reply = re.sub(RE.weight, '', reply) # Leftover {weight}s 2047 if len(stars) > 0: 2048 reply = reply.replace('<star>', stars[1]) 2049 reStars = re.findall(RE.star_tags, reply) 2050 for match in reStars: 2051 if int(match) < len(stars): 2052 reply = reply.replace('<star{match}>'.format(match=match), stars[int(match)]) 2053 if len(botstars) > 0: 2054 reply = reply.replace('<botstar>', botstars[1]) 2055 reStars = re.findall(RE.botstars, reply) 2056 for match in reStars: 2057 if int(match) < len(botstars): 2058 reply = reply.replace('<botstar{match}>'.format(match=match), botstars[int(match)]) 2059 2060 # <input> and <reply> 2061 reply = reply.replace('<input>', self._users[user]['__history__']['input'][0]) 2062 reply = reply.replace('<reply>', self._users[user]['__history__']['reply'][0]) 2063 reInput = re.findall(RE.input_tags, reply) 2064 for match in reInput: 2065 reply = reply.replace('<input{match}>'.format(match=match), 2066 self._users[user]['__history__']['input'][int(match) - 1]) 2067 reReply = re.findall(RE.reply_tags, reply) 2068 for match in reReply: 2069 reply = reply.replace('<reply{match}>'.format(match=match), 2070 self._users[user]['__history__']['reply'][int(match) - 1]) 2071 2072 # <id> and escape codes. 2073 reply = reply.replace('<id>', user) 2074 reply = reply.replace('\\s', ' ') 2075 reply = reply.replace('\\n', "\n") 2076 reply = reply.replace('\\#', '#') 2077 2078 # Random bits. 2079 reRandom = re.findall(RE.random_tags, reply) 2080 for match in reRandom: 2081 output = '' 2082 if '|' in match: 2083 output = random.choice(match.split('|')) 2084 else: 2085 output = random.choice(match.split(' ')) 2086 reply = reply.replace('{{random}}{match}{{/random}}'.format(match=match), output) 2087 2088 # Person Substitutions and String Formatting. 2089 for item in ['person', 'formal', 'sentence', 'uppercase', 'lowercase']: 2090 matcher = re.findall(r'\{' + item + r'\}(.+?)\{/' + item + r'\}', reply) 2091 for match in matcher: 2092 output = None 2093 if item == 'person': 2094 # Person substitutions. 2095 output = self._substitute(match, "person") 2096 else: 2097 output = self._string_format(match, item) 2098 reply = reply.replace('{{{item}}}{match}{{/{item}}}'.format(item=item, match=match), output) 2099 2100 # Handle all variable-related tags with an iterative regex approach, 2101 # to allow for nesting of tags in arbitrary ways (think <set a=<get b>>) 2102 # Dummy out the <call> tags first, because we don't handle them right 2103 # here. 2104 reply = reply.replace("<call>", "{__call__}") 2105 reply = reply.replace("</call>", "{/__call__}") 2106 while True: 2107 # This regex will match a <tag> which contains no other tag inside 2108 # it, i.e. in the case of <set a=<get b>> it will match <get b> but 2109 # not the <set> tag, on the first pass. The second pass will get the 2110 # <set> tag, and so on. 2111 match = re.search(RE.tag_search, reply) 2112 if not match: break # No remaining tags! 2113 2114 match = match.group(1) 2115 parts = match.split(" ", 1) 2116 tag = parts[0].lower() 2117 data = parts[1] if len(parts) > 1 else "" 2118 insert = "" # Result of the tag evaluation 2119 2120 # Handle the tags. 2121 if tag == "bot" or tag == "env": 2122 # <bot> and <env> tags are similar. 2123 target = self._bvars if tag == "bot" else self._gvars 2124 if "=" in data: 2125 # Setting a bot/env variable. 2126 parts = data.split("=") 2127 self._say("Set " + tag + " variable " + text_type(parts[0]) + "=" + text_type(parts[1])) 2128 target[parts[0]] = parts[1] 2129 else: 2130 # Getting a bot/env variable. 2131 insert = target.get(data, "undefined") 2132 elif tag == "set": 2133 # <set> user vars. 2134 parts = data.split("=") 2135 self._say("Set uservar " + text_type(parts[0]) + "=" + text_type(parts[1])) 2136 self._users[user][parts[0]] = parts[1] 2137 elif tag in ["add", "sub", "mult", "div"]: 2138 # Math operator tags. 2139 parts = data.split("=") 2140 var = parts[0] 2141 value = parts[1] 2142 2143 # Sanity check the value. 2144 try: 2145 value = int(value) 2146 if var not in self._users[user]: 2147 # Initialize it. 2148 self._users[user][var] = 0 2149 except: 2150 insert = "[ERR: Math can't '{}' non-numeric value '{}']".format(tag, value) 2151 2152 # Attempt the operation. 2153 try: 2154 orig = int(self._users[user][var]) 2155 new = 0 2156 if tag == "add": 2157 new = orig + value 2158 elif tag == "sub": 2159 new = orig - value 2160 elif tag == "mult": 2161 new = orig * value 2162 elif tag == "div": 2163 new = orig / value 2164 self._users[user][var] = new 2165 except: 2166 insert = "[ERR: Math couldn't '{}' to value '{}']".format(tag, self._users[user][var]) 2167 elif tag == "get": 2168 insert = self._users[user].get(data, "undefined") 2169 else: 2170 # Unrecognized tag. 2171 insert = "\x00{}\x01".format(match) 2172 2173 reply = reply.replace("<{}>".format(match), insert) 2174 2175 # Restore unrecognized tags. 2176 reply = reply.replace("\x00", "<").replace("\x01", ">") 2177 2178 # Streaming code. DEPRECATED! 2179 if '{!' in reply: 2180 self._warn("Use of the {!...} tag is deprecated and not supported here.") 2181 2182 # Topic setter. 2183 reTopic = re.findall(RE.topic_tag, reply) 2184 for match in reTopic: 2185 self._say("Setting user's topic to " + match) 2186 self._users[user]["topic"] = match 2187 reply = reply.replace('{{topic={match}}}'.format(match=match), '') 2188 2189 # Inline redirecter. 2190 reRedir = re.findall(RE.redir_tag, reply) 2191 for match in reRedir: 2192 self._say("Redirect to " + match) 2193 at = match.strip() 2194 subreply = self._getreply(user, at, step=(depth + 1)) 2195 reply = reply.replace('{{@{match}}}'.format(match=match), subreply) 2196 2197 # Object caller. 2198 reply = reply.replace("{__call__}", "<call>") 2199 reply = reply.replace("{/__call__}", "</call>") 2200 reCall = re.findall(r'<call>(.+?)</call>', reply) 2201 for match in reCall: 2202 parts = re.split(RE.ws, match) 2203 output = '' 2204 obj = parts[0] 2205 args = [] 2206 if len(parts) > 1: 2207 args = parts[1:] 2208 2209 # Do we know this object? 2210 if obj in self._objlangs: 2211 # We do, but do we have a handler for that language? 2212 lang = self._objlangs[obj] 2213 if lang in self._handlers: 2214 # We do. 2215 try: 2216 output = self._handlers[lang].call(self, obj, user, args) 2217 except python.PythonObjectError as e: 2218 self._warn(str(e)) 2219 if not ignore_object_errors: 2220 raise ObjectError(str(e)) 2221 output = RS_ERR_OBJECT 2222 else: 2223 if not ignore_object_errors: 2224 raise ObjectError(RS_ERR_OBJECT_HANDLER) 2225 output = RS_ERR_OBJECT_HANDLER 2226 else: 2227 if not ignore_object_errors: 2228 raise ObjectError(RS_ERR_OBJECT_MISSING) 2229 output = RS_ERR_OBJECT_MISSING 2230 2231 reply = reply.replace('<call>{match}</call>'.format(match=match), output) 2232 2233 return reply
2234
2235 - def _string_format(self, msg, method):
2236 """Format a string (upper, lower, formal, sentence).""" 2237 if method == "uppercase": 2238 return msg.upper() 2239 elif method == "lowercase": 2240 return msg.lower() 2241 elif method == "sentence": 2242 return msg.capitalize() 2243 elif method == "formal": 2244 return string.capwords(msg)
2245 2246 ############################################################################ 2247 # Topic inheritance Utility Methods # 2248 ############################################################################ 2249
2250 - def _topic_triggers(self, topic, triglvl, depth=0, inheritance=0, inherited=False):
2251 """Recursively scan a topic and return a list of all triggers.""" 2252 2253 # Break if we're in too deep. 2254 if depth > self._depth: 2255 self._warn("Deep recursion while scanning topic inheritance") 2256 2257 # Important info about the depth vs inheritance params to this function: 2258 # depth increments by 1 each time this function recursively calls itself. 2259 # inheritance increments by 1 only when this topic inherits another 2260 # topic. 2261 # 2262 # This way, '> topic alpha includes beta inherits gamma' will have this 2263 # effect: 2264 # alpha and beta's triggers are combined together into one matching 2265 # pool, and then those triggers have higher matching priority than 2266 # gamma's. 2267 # 2268 # The inherited option is True if this is a recursive call, from a topic 2269 # that inherits other topics. This forces the {inherits} tag to be added 2270 # to the triggers. This only applies when the top topic 'includes' 2271 # another topic. 2272 self._say("\tCollecting trigger list for topic " + topic + "(depth=" 2273 + str(depth) + "; inheritance=" + str(inheritance) + "; " 2274 + "inherited=" + str(inherited) + ")") 2275 2276 # topic: the name of the topic 2277 # triglvl: reference to self._topics or self._thats 2278 # depth: starts at 0 and ++'s with each recursion 2279 2280 # Collect an array of triggers to return. 2281 triggers = [] 2282 2283 # Get those that exist in this topic directly. 2284 inThisTopic = [] 2285 if topic in triglvl: 2286 for trigger in triglvl[topic]: 2287 inThisTopic.append(trigger) 2288 2289 # Does this topic include others? 2290 if topic in self._includes: 2291 # Check every included topic. 2292 for includes in self._includes[topic]: 2293 self._say("\t\tTopic " + topic + " includes " + includes) 2294 triggers.extend(self._topic_triggers(includes, triglvl, (depth + 1), inheritance, True)) 2295 2296 # Does this topic inherit others? 2297 if topic in self._lineage: 2298 # Check every inherited topic. 2299 for inherits in self._lineage[topic]: 2300 self._say("\t\tTopic " + topic + " inherits " + inherits) 2301 triggers.extend(self._topic_triggers(inherits, triglvl, (depth + 1), (inheritance + 1), False)) 2302 2303 # Collect the triggers for *this* topic. If this topic inherits any 2304 # other topics, it means that this topic's triggers have higher 2305 # priority than those in any inherited topics. Enforce this with an 2306 # {inherits} tag. 2307 if topic in self._lineage or inherited: 2308 for trigger in inThisTopic: 2309 self._say("\t\tPrefixing trigger with {inherits=" + str(inheritance) + "}" + trigger) 2310 triggers.append("{inherits=" + str(inheritance) + "}" + trigger) 2311 else: 2312 triggers.extend(inThisTopic) 2313 2314 return triggers
2315
2316 - def _find_trigger_by_inheritance(self, topic, trig, depth=0):
2317 """Locate the replies for a trigger in an inherited/included topic.""" 2318 2319 # This sub was called because the user matched a trigger from the sorted 2320 # array, but the trigger doesn't belong to their topic, and is instead 2321 # in an inherited or included topic. This is to search for it. 2322 2323 # Prevent recursion. 2324 if depth > self._depth: 2325 self._warn("Deep recursion detected while following an inheritance trail!") 2326 return None 2327 2328 # inheritance is more important than inclusion: triggers in one topic can 2329 # override those in an inherited topic. 2330 if topic in self._lineage: 2331 for inherits in sorted(self._lineage[topic]): 2332 # See if this inherited topic has our trigger. 2333 if trig in self._topics[inherits]: 2334 # Great! 2335 return self._topics[inherits][trig] 2336 else: 2337 # Check what THAT topic inherits from. 2338 match = self._find_trigger_by_inheritance( 2339 inherits, trig, (depth + 1) 2340 ) 2341 if match: 2342 # Found it! 2343 return match 2344 2345 # See if this topic has an "includes" 2346 if topic in self._includes: 2347 for includes in sorted(self._includes[topic]): 2348 # See if this included topic has our trigger. 2349 if trig in self._topics[includes]: 2350 # Great! 2351 return self._topics[includes][trig] 2352 else: 2353 # Check what THAT topic inherits from. 2354 match = self._find_trigger_by_inheritance( 2355 includes, trig, (depth + 1) 2356 ) 2357 if match: 2358 # Found it! 2359 return match 2360 2361 # Don't know what else to do! 2362 return None
2363
2364 - def _get_topic_tree(self, topic, depth=0):
2365 """Given one topic, get the list of all included/inherited topics.""" 2366 2367 # Break if we're in too deep. 2368 if depth > self._depth: 2369 self._warn("Deep recursion while scanning topic trees!") 2370 return [] 2371 2372 # Collect an array of all topics. 2373 topics = [topic] 2374 2375 # Does this topic include others? 2376 if topic in self._includes: 2377 # Try each of these. 2378 for includes in sorted(self._includes[topic]): 2379 topics.extend(self._get_topic_tree(includes, depth + 1)) 2380 2381 # Does this topic inherit others? 2382 if topic in self._lineage: 2383 # Try each of these. 2384 for inherits in sorted(self._lineage[topic]): 2385 topics.extend(self._get_topic_tree(inherits, depth + 1)) 2386 2387 return topics
2388 2389 ############################################################################ 2390 # Miscellaneous Private Methods # 2391 ############################################################################ 2392
2393 - def _is_atomic(self, trigger):
2394 """Determine if a trigger is atomic or not.""" 2395 2396 # Atomic triggers don't contain any wildcards or parenthesis or anything 2397 # of the sort. We don't need to test the full character set, just left 2398 # brackets will do. 2399 special = ['*', '#', '_', '(', '[', '<'] 2400 for char in special: 2401 if char in trigger: 2402 return False 2403 2404 return True
2405
2406 - def _word_count(self, trigger, all=False):
2407 """Count the words that aren't wildcards in a trigger.""" 2408 words = [] 2409 if all: 2410 words = re.split(RE.ws, trigger) 2411 else: 2412 words = re.split(RE.wilds, trigger) 2413 2414 wc = 0 # Word count 2415 for word in words: 2416 if len(word) > 0: 2417 wc += 1 2418 2419 return wc
2420
2421 - def _strip_nasties(self, s):
2422 """Formats a string for ASCII regex matching.""" 2423 s = re.sub(RE.nasties, '', s) 2424 return s
2425
2426 - def _dump(self):
2427 """For debugging, dump the entire data structure.""" 2428 pp = pprint.PrettyPrinter(indent=4) 2429 2430 print("=== Variables ===") 2431 print("-- Globals --") 2432 pp.pprint(self._gvars) 2433 print("-- Bot vars --") 2434 pp.pprint(self._bvars) 2435 print("-- Substitutions --") 2436 pp.pprint(self._subs) 2437 print("-- Person Substitutions --") 2438 pp.pprint(self._person) 2439 print("-- Arrays --") 2440 pp.pprint(self._arrays) 2441 2442 print("=== Topic Structure ===") 2443 pp.pprint(self._topics) 2444 print("=== %Previous Structure ===") 2445 pp.pprint(self._thats) 2446 2447 print("=== Includes ===") 2448 pp.pprint(self._includes) 2449 2450 print("=== Inherits ===") 2451 pp.pprint(self._lineage) 2452 2453 print("=== Sort Buffer ===") 2454 pp.pprint(self._sorted) 2455 2456 print("=== Syntax Tree ===") 2457 pp.pprint(self._syntax)
2458
2459 2460 ################################################################################ 2461 # Exception Classes # 2462 ################################################################################ 2463 2464 -class RiveScriptError(Exception):
2465 """RiveScript base exception class"""
2466 - def __init__(self, error_message=None):
2467 super(RiveScriptError, self).__init__(error_message) 2468 self.error_message = error_message
2469
2470 2471 -class NoMatchError(RiveScriptError):
2472 """No reply could be matched"""
2473 - def __init__(self):
2474 super(NoMatchError, self).__init__(RS_ERR_MATCH)
2475
2476 2477 -class NoReplyError(RiveScriptError):
2478 """No reply could be found"""
2479 - def __init__(self):
2480 super(NoReplyError, self).__init__(RS_ERR_REPLY)
2481
2482 2483 -class ObjectError(RiveScriptError):
2484 """An error occurred when executing a Python object"""
2485 - def __init__(self, error_message=RS_ERR_OBJECT):
2486 super(ObjectError, self).__init__(error_message)
2487
2488 2489 -class DeepRecursionError(RiveScriptError):
2490 """Prevented an infinite loop / deep recursion, unable to retrieve a reply for this message"""
2491 - def __init__(self):
2493
2494 2495 -class NoDefaultRandomTopicError(Exception):
2496 """No default topic 'random' could be found, critical error""" 2497 pass
2498
2499 2500 -class RepliesNotSortedError(Exception):
2501 """sort_replies() was not called after the RiveScript documents were loaded, critical error""" 2502 pass
2503 2504 2505 ################################################################################ 2506 # Interactive Mode # 2507 ################################################################################ 2508 2509 if __name__ == "__main__": 2510 from interactive import interactive_mode 2511 interactive_mode() 2512 2513 # vim:expandtab 2514