evalfitness

1 """ 2 evalfitness 3 =========== 4 Contains methods to evaluate the fitness of a tree. 5 6 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 7 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 8 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 9 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 10 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 11 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 12 THE SOFTWARE. 13 14 15 @author: by Mehdi Khoury 16 @version: 1.20 17 @copyright: (c) 2009 Mehdi Khoury under the mit license 18 http://www.opensource.org/licenses/mit-license.html 19 @contact: mehdi.khoury at gmail.com 20 21 """ 22 23 from treeutil import PostOrder_Search 24 from collections import deque 25 import psyco 26 import random 27 import math 28 import settings 29 import buildtree 30 import timeit 31 import settings 32 import fitnessutil 33 34 psyco.profile() 35 36

37 -def EvalTreeForOneInputSet(myTree, input_set_ref):

38 """ 39 Function: EvalTreeForOneInputSet 40 ================================= 41 Function used to evaluate a tree by pluggin in 42 one set of values (one learning example) 43 44 @param myTree: the nested list representing a tree 45 @param input_set_ref: the set of values to plug into the tree 46 @return: the fitness of the tree for this set of values 47 """ 48 resultStack=deque() 49 adfDict={} 50 # examine every node in the tree in pre-order taversal 51 # (leaves first and then branches) 52 for elem in PostOrder_Search(myTree): 53 # if the node is a leaf (variable, or constant), 54 # add its value to the result stack 55 if elem[0]==3: 56 resultStack.append(settings.terminals[elem[2]][input_set_ref]) 57 elif elem[0]==4: 58 resultStack.append(settings.terminals[elem[2]]) 59 # if the node is a function with n arguments, pop the result stack 60 # n times. Get these popped elemnts as arguments for the function, 61 # and replace the top of the stack by the result of the function 62 elif elem[0]==1: 63 nb= elem[1] 64 name = elem[2] 65 tempResult=deque() 66 for i in xrange(0,nb): 67 try: 68 tempResult.append(resultStack.pop()) 69 except: 70 print myTree 71 print elem 72 print resultStack 73 exit 74 resultStack.extend(map(settings.functions[name],[tempResult])) 75 # if the node is an ADF branch, add the top of the stack to 76 # the ADF dictionary 77 elif elem[0]==2: 78 adfDict[elem[2]]= resultStack[-1] 79 # if the node is an ADF terminal, add the corresponding ADF 80 # branch value available in the dictionary to the result stack 81 elif elem[0]==5: 82 resultStack.append(adfDict[elem[2]]) 83 # if the node is a root, apply the root function to all 84 # direct children and return the result. 85 elif elem[0]==0: 86 name = elem[2] 87 tempResult=[] 88 while resultStack: 89 tempResult.append(resultStack.popleft()) 90 resultStack.extend(map(settings.functions[name],[tempResult])) 91 return resultStack[0]

92 93 94

95 -def EvalTreeForOneListInputSet(myTree):

96 """ 97 Function: EvalTreeForOneInputSet 98 ================================= 99 Function used to evaluate a tree by pluggin in 100 one list of values (one list of data points) 101 102 @param myTree: the nested list representing a tree 103 @return: the fitness of the tree for this set of values 104 """ 105 resultStack=deque() 106 adfDict={} 107 # examine every node in the tree in pre-order taversal 108 # (leaves first and then branches) 109 for elem in PostOrder_Search(myTree): 110 # if the node is a leaf (variable, or constant), 111 # add its value to the result stack 112 if elem[0]==3: 113 resultStack.append(settings.terminals[elem[2]]) 114 115 elif elem[0]==4: 116 resultStack.append(settings.terminals[elem[2]]) 117 # if the node is a function with n arguments, pop the result stack 118 # n times. Get these popped elemnts as arguments for the function, 119 # and replace the top of the stack by the result of the function 120 elif elem[0]==1: 121 nb= elem[1] 122 name = elem[2] 123 tempResult=deque() 124 for i in xrange(0,nb): 125 tempResult.append(resultStack.pop()) 126 resultStack.extend(map(settings.functions[name],[tempResult])) 127 # if the node is an ADF branch, add the top of the stack to 128 # the ADF dictionary 129 elif elem[0]==2: 130 adfDict[elem[2]]= resultStack[-1] 131 # if the node is an ADF terminal, add the corresponding ADF 132 # branch value available in the dictionary to the result stack 133 elif elem[0]==5: 134 resultStack.append(adfDict[elem[2]]) 135 # if the node is a root, apply the root function to all 136 # direct children and return the result. 137 elif elem[0]==0: 138 name = elem[2] 139 tempResult=[] 140 while resultStack: 141 tempResult.append(resultStack.popleft()) 142 resultStack.extend(map(settings.functions[name],[tempResult])) 143 return resultStack[0]

144

145 -def EvalTreeForOneListInputSet_tutorial8(myTree):

146 """ 147 Function: EvalTreeForOneInputSet2 148 ================================== 149 Function used to evaluate a tree by pluggin in 150 one list of values (one list of data points) 151 152 @param myTree: the nested list representing a tree 153 @return: the fitness of the tree for this set of values 154 """ 155 resultStack=deque() 156 adfDict={} 157 # examine every node in the tree in pre-order taversal 158 # (leaves first and then branches) 159 for elem in PostOrder_Search(myTree): 160 # if the node is a leaf (variable, or constant), 161 # add its value to the result stack 162 if elem[0]==3: 163 resultStack.append(settings.terminals[elem[2]]) 164 elif elem[0]==4: 165 resultStack.append(settings.terminals[elem[2]]) 166 # if the node is a function with n arguments, pop the result stack 167 # n times. Get these popped elemnts as arguments for the function, 168 # and replace the top of the stack by the result of the function 169 elif elem[0]==1: 170 nb= elem[1] 171 name = elem[2] 172 tempResult=deque() 173 for i in xrange(nb): 174 tempResult.append(resultStack.pop()) 175 resultStack.extend(map(settings.functions[name],[tempResult])) 176 177 # if the node is an ADF branch, add the top of the stack to 178 # the ADF dictionary 179 elif elem[0]==2: 180 adfDict[elem[2]]= resultStack[-1] 181 # if the node is an ADF terminal, add the corresponding ADF 182 # branch value available in the dictionary to the result stack 183 elif elem[0]==5: 184 resultStack.append(adfDict[elem[2]]) 185 # if the node is a root, apply the root function to all 186 # direct children and return the result. 187 elif elem[0]==0: 188 name = elem[2] 189 tempResult=[] 190 while resultStack: 191 tempResult.append(resultStack.popleft()) 192 resultStack.extend(map(settings.functions[name],[tempResult])) 193 return resultStack[0]

194 195

196 -def EvalTreeForOneListInputSet_tutorial9(myTree):

197 """ 198 Function: EvalTreeForOneInputSet2 199 ================================== 200 Function used to evaluate a tree by pluggin in 201 one list of values (one list of data points) 202 203 @param myTree: the nested list representing a tree 204 @return: the fitness of the tree for this set of values 205 """ 206 resultStack=deque() 207 adfDict={} 208 # examine every node in the tree in pre-order taversal 209 # (leaves first and then branches) 210 for elem in PostOrder_Search(myTree): 211 # if the node is a leaf (variable, or constant), 212 # add its value to the result stack 213 if elem[0]==3: 214 resultStack.append(settings.terminals[elem[2]]) 215 elif elem[0]==4: 216 resultStack.append(settings.terminals[elem[2]]) 217 # if the node is a function with n arguments, pop the result stack 218 # n times. Get these popped elemnts as arguments for the function, 219 # and replace the top of the stack by the result of the function 220 elif elem[0]==1: 221 nb= elem[1] 222 name = elem[2] 223 tempResult=deque() 224 for i in xrange(nb): 225 tempResult.append(resultStack.pop()) 226 resultStack.extend(map(settings.functions[name],[tempResult])) 227 228 # if the node is an ADF branch, add the top of the stack to 229 # the ADF dictionary 230 elif elem[0]==2: 231 adfDict[elem[2]]= resultStack[-1] 232 # if the node is an ADF terminal, add the corresponding ADF 233 # branch value available in the dictionary to the result stack 234 elif elem[0]==5: 235 resultStack.append(adfDict[elem[2]]) 236 # if the node is a root, apply the root function to all 237 # direct children and return the result. 238 elif elem[0]==0: 239 name = elem[2] 240 tempResult=[] 241 while resultStack: 242 tempResult.append(resultStack.popleft()) 243 resultStack.extend(map(settings.functions[name],[tempResult])) 244 return resultStack[0]

245 246

247 -def EvalTreeForAllInputSets(myTree, input_sets):

248 """ 249 Function: EvalTreeForAllInputSets 250 ================================== 251 Function used to evaluate a tree by pluggin in 252 several sets of values 253 254 @param myTree: the nested list representing a tree 255 @param input_sets: the set of values to plug into the tree 256 @return: the fitnesses of the tree over several sets of values 257 """ 258 results=[] 259 val=len(input_sets) 260 for elem in xrange(val): 261 results.append(EvalTreeForOneInputSet(myTree, elem)) 262 return results

263 # compute global fitness of an individual across all different examples 264 265 266

267 -def FinalFitness(intermediate_outputs):

268 """ 269 Function: FinalFitness 270 ======================= 271 Compute global fitness of an individual. Intended when wanting to refine 272 the fitness score. 273 274 @param intermediate_outputs: the fitnesses of the tree over several sets of 275 values 276 @return: global fitness 277 """ 278 final_output=0 279 # each element represents one different sample or set of input data 280 # the size of each represents the number of examples 281 #each sub-element represents the value(s) obtained at the top of a three for one input 282 #In this particular case, we simply add the difference of all results with an ideal solution 283 284 # the ideal solution is : [adf1 = x+y adf2 = add1*(y-x)] 285 # build a corresponding list of two-elements sub lists 286 # then evaluate the sum of the difference with our built models 287 goal_function=[] 288 for nb in xrange(settings.nb_eval): 289 #for nb in xrange(settings.nb_ex): 290 291 ideal_results=settings.ideal_results[nb] 292 obtained_results=intermediate_outputs[nb] 293 294 295 for el in obtained_results: 296 try: 297 if math.isinf(el): 298 return el 299 except: 300 return float('inf') 301 # sum the absolute values of the differences over one example 302 diff= sum( [math.fabs(ideal_results[x]-obtained_results[x]) for x in xrange(len(ideal_results))]) 303 final_output= final_output+diff 304 return final_output

305

306 -def FinalFitness2(intermediate_outputs):

307 """ 308 Function: FinalFitness2 309 ======================== 310 Compute global fitness of an individual. Intended when wanting to refine 311 the fitness score. 312 313 @param intermediate_outputs: the fitnesses of the tree over several sets of 314 values 315 @return: global fitness 316 """ 317 final_output=0 318 # each element represents one different sample or set of input data 319 # the size of each represents the number of examples 320 #each sub-element represents the value(s) obtained at the top of a three for one input 321 #In this particular case, we simply add the difference of all results with an ideal solution 322 323 # the ideal solution is : [adf1 = x+y adf2 = add1*(y-x)] 324 # build a corresponding list of two-elements sub lists 325 # then evaluate the sum of the difference with our built models 326 ideal_results=settings.ideal_results 327 obtained_results=intermediate_outputs 328 for res in xrange(len(settings.ideal_results)): 329 for el in obtained_results[res]: 330 try: 331 if math.isinf(el): 332 return el 333 except: 334 return float('inf') 335 # sum the absolute values of the differences over one example 336 diff= sum( [math.fabs(ideal_results[res][x]-obtained_results[res][x]) for x in xrange(settings.nb_ex)]) 337 final_output= final_output+diff 338 return final_output

339

340 -def FinalFitness3(intermediate_outputs):

341 """ 342 Function: FinalFitness3 343 ======================== 344 Compute global fitness of an individual. Intended when wanting to refine 345 the fitness score. 346 347 @param intermediate_outputs: the fitnesses of the tree over several sets of 348 values 349 @return: global fitness 350 """ 351 final_output=0 352 # each element represents one different sample or set of input data 353 # the size of each represents the number of examples 354 #each sub-element represents the value(s) obtained at the top of a three for one input 355 #In this particular case, we simply add the difference of all results with an ideal solution 356 357 # the ideal solution is : [adf1 = x+y adf2 = add1*(y-x)] 358 # build a corresponding list of two-elements sub lists 359 # then evaluate the sum of the difference with our built models 360 goal_function=[] 361 for nb in xrange(settings.nb_eval): 362 #for nb in xrange(settings.nb_ex): 363 364 ideal_results=settings.ideal_results[nb] 365 obtained_results=intermediate_outputs[nb] 366 #print ideal_results 367 #print obtained_results 368 for el in obtained_results: 369 try: 370 if math.isinf(el): 371 return el 372 except: 373 return float('inf') 374 # sum the absolute values of the differences over one example 375 # here we use a very very puzzling python list comprehension... This deserve a bit of explanation. 376 # In general, the expression "T if C is true, or F if C is false" can be written as (F, T)[bool(C)]. 377 # This single line could be replaced by a simpler but slower expression of the type: 378 #z=[] 379 #for i in range(10): 380 # if C: 381 # z.append(T) 382 # else: 383 # z.append(F) 384 # In our case, if the first element of obtained_resultsis is True (the result of the if statement) 385 # then use the result produce by the second branch, otherwise use the result produced by the third 386 # branch. 387 # As far as we are concerned, list comprehension are faster + compact + more memory efficient. 388 # so for this crucial fitness calculation bit, I chose this solution... 389 # May the deities of the Python programming pantheon forgive me (Sorry Guido...). 390 diff= sum( [(math.fabs(ideal_results[x]-obtained_results[1]) ,math.fabs(ideal_results[x]-obtained_results[2]) )[obtained_results[0]] for x in xrange(len(ideal_results))]) 391 final_output= final_output+diff 392 return final_output

393 394

395 -def FinalFitness4(intermediate_outputs):

396 """ 397 Function: FinalFitness3 398 ======================== 399 Compute global fitness of an individual. Intended when wanting to refine 400 the fitness score. 401 402 @param intermediate_outputs: the fitnesses of the tree over several sets of 403 values 404 @return: global fitness 405 """ 406 final_output=0 407 # each element represents one different sample or set of input data 408 # the size of each represents the number of examples 409 #each sub-element represents the value(s) obtained at the top of a three for one input 410 #In this particular case, we simply add the difference of all results with an ideal solution 411 412 # the ideal solution is : [adf1 = x+y adf2 = add1*(y-x)] 413 # build a corresponding list of two-elements sub lists 414 # then evaluate the sum of the difference with our built models 415 goal_function=[] 416 417 for nb in xrange(len(intermediate_outputs)): 418 for el in intermediate_outputs[nb]: 419 for el2 in el: 420 try: 421 if isinstance(el2, bool): 422 pass 423 elif math.isinf(el2): 424 return el2 425 except: 426 return float('inf') 427 # sum the absolute values of the differences over one example 428 # here we use a very very puzzling python list comprehension... This deserve a bit of explanation. 429 # In general, the expression "T if C is true, or F if C is false" can be written as (F, T)[bool(C)]. 430 # This single line could be replaced by a simpler but slower expression of the type: 431 #z=[] 432 #for i in range(10): 433 # if C: 434 # z.append(T) 435 # else: 436 # z.append(F) 437 # In our case, if the first element of obtained_resultsis is True (the result of the if statement) 438 # then use the result produce by the second branch, otherwise use the result produced by the third 439 # branch. 440 # As far as we are concerned, list comprehension are faster + compact + more memory efficient. 441 # so for this crucial fitness calculation bit, I chose this solution... 442 # May the deities of the Python programming pantheon forgive me (Sorry Guido...). 443 final_output= sum([(math.fabs(settings.ideal_results[x][y]-intermediate_outputs[2][x][y]),math.fabs(settings.ideal_results[x][y]-intermediate_outputs[1][x][y])) [intermediate_outputs[0][x][y]] for x in xrange(len(intermediate_outputs[1])) for y in xrange(len(intermediate_outputs[1][x]))]) 444 445 return final_output

446 447

448 -def FinalFitness_tutorial8(intermediate_outputs):

449 """ 450 Function: FinalFitness3 451 ======================== 452 Compute global fitness of an individual. Intended when wanting to refine 453 the fitness score. 454 455 @param intermediate_outputs: the fitnesses of the tree over several sets of 456 values 457 @return: global fitness 458 """ 459 final_output=0 460 goal_function=[] 461 462 #for i in xrange( len(settings.ideal_results)): 463 # for j in xrange( len(settings.ideal_results[i])): 464 # print settings.ideal_results[i][j] 465 #print settings.inputdata 466 #print intermediate_outputs[0] 467 #print intermediate_outputs[1] 468 #print intermediate_outputs[2] 469 try: 470 result=fitnessutil.ReplaceUsingBinaryMask(settings.inputdata,intermediate_outputs[0],intermediate_outputs[1],intermediate_outputs[2] ) 471 # expand the compact array 472 uncompressed_result= [fitnessutil.UncompressList(result[x][0]) for x in xrange(len(result))] 473 uncompressed_ideal_results= [fitnessutil.UncompressList(settings.ideal_results[x]) for x in xrange(len(result))] 474 475 476 final_output= sum([(0,1)[uncompressed_result[x][y]!=uncompressed_ideal_results[x][y]] for x in xrange(len(uncompressed_result)) for y in xrange(len(uncompressed_result[x]))]) 477 except: 478 final_output=float('inf') 479 #print result[0][0] 480 481 #final_output= sum([(math.fabs(settings.ideal_results[x][y]-intermediate_outputs[2][x][y]),math.fabs(settings.ideal_results[x][y]-intermediate_outputs[1][x][y])) [intermediate_outputs[0][x][y]] for x in xrange(len(intermediate_outputs[1])) for y in xrange(len(intermediate_outputs[1][x]))]) 482 #for i in xrange( len(result)): 483 # for j in xrange( len(result[i])): 484 # print result[i][j] 485 486 return final_output

487 488

489 -def FinalFitness_tutorial9(intermediate_outputs):

490 """ 491 Function: FinalFitness3 492 ======================== 493 Compute global fitness of an individual. Intended when wanting to refine 494 the fitness score. 495 496 @param intermediate_outputs: the fitnesses of the tree over several sets of 497 values 498 @return: global fitness 499 """ 500 final_output=0 501 goal_function=[] 502 503 #for i in xrange( len(settings.ideal_results)): 504 # for j in xrange( len(settings.ideal_results[i])): 505 # print settings.ideal_results[i][j] 506 #print settings.inputdata 507 #print intermediate_outputs 508 #print intermediate_outputs[1] 509 #print intermediate_outputs[2] 510 511 try: 512 #print list(xrange(1,(len(intermediate_outputs)/3)+1)) 513 temp_input=settings.inputdata 514 #print temp_input 515 for i in xrange(1,(len(intermediate_outputs)/3)+1): 516 temp_result=fitnessutil.ReplaceUsingBinaryMask(temp_input,intermediate_outputs[(i*3)-1],intermediate_outputs[(i*3)-2],intermediate_outputs[(i*3)-3] ) 517 temp_input=temp_result 518 #print temp_input 519 520 # expand the compact array 521 uncompressed_result= [fitnessutil.UncompressList(temp_result[x][0]) for x in xrange(len(temp_result))] 522 uncompressed_ideal_results= [fitnessutil.UncompressList(settings.ideal_results[x]) for x in xrange(len(temp_result))] 523 524 525 final_output= sum([(0,1)[uncompressed_result[x][y]!=uncompressed_ideal_results[x][y]] for x in xrange(len(uncompressed_result)) for y in xrange(len(uncompressed_result[x]))]) 526 except: 527 final_output=float('inf') 528 529 #print result[0][0] 530 531 #final_output= sum([(math.fabs(settings.ideal_results[x][y]-intermediate_outputs[2][x][y]),math.fabs(settings.ideal_results[x][y]-intermediate_outputs[1][x][y])) [intermediate_outputs[0][x][y]] for x in xrange(len(intermediate_outputs[1])) for y in xrange(len(intermediate_outputs[1][x]))]) 532 #for i in xrange( len(result)): 533 # for j in xrange( len(result[i])): 534 # print result[i][j] 535 536 return final_output

537 538 539 if __name__ == '__main__': 540 541 542 #for i in xrange(2000): 543 #a=buildtree.buildTree().AddFullNode((0,3,'root'),0,2,8) 544 #a=buildtree.buildTree().AddFullNode((0,3,'root'),0,8) 545 a=buildtree.buildTree().AddFullNode((0,2,'root'),0,8) 546 #print a 547 #print i 548 549 #print r 550 551 #print FinalFitness_tutorial9(EvalTreeForOneListInputSet_tutorial9(a)) 552 553 #t1 = timeit.Timer('evalfitness.EvalTreeForOneListInputSet_tutorial9(a)' , 'from __main__ import a ;import evalfitness') 554 t2 = timeit.Timer('evalfitness.FinalFitness_tutorial9(evalfitness.EvalTreeForOneListInputSet_tutorial9(a))' , 'from __main__ import a ;import evalfitness') 555 #print t1.timeit(100) 556 print t2.timeit(1000) 557 #print FinalFitness_tutorial8(EvalTreeForOneListInputSet_tutorial8(a)) 558 559 #print FinalFitness3(EvalTreeForAllInputSets(a,xrange(20))) 560 #print FinalFitness4(EvalTreeForOneListInputSet(a)) 561 #print EvalTreeForOneInputSet(a,0) 562 563 #i=EvalTreeForOneListInputSet2(a) 564 #print i 565 #print FinalFitness4(i) 566 #t1 = timeit.Timer('evalfitness.EvalTreeForOneInputSet(a,0)' , 'from __main__ import a ;import evalfitness') 567 #t2 = timeit.Timer('evalfitness.EvalTreeForAllInputSets(a,xrange(1000))' , 'from __main__ import a ;import evalfitness') 568 #t3 = timeit.Timer('i=evalfitness.EvalTreeForAllInputSets(a,xrange(1000)); evalfitness.FinalFitness(i)' , 'from __main__ import a ;import evalfitness') 569 570 #print t.timeit(100) 571 #print t1.repeat(1,1000) 572 #print t2.repeat(1,1000) 573 #print t3.repeat(1,1000) 574 #print sum(t1.repeat(1,1000))/1000 575 #print sum(t2.repeat(1,1000))/1000 576 #print sum(t3.repeat(1,1000))/1000 577

Source Code for Module evalfitness