tools/modelselector/modelselector.py

   1 # Copyright 2022 Linux Foundation.
   2 # srao@linuxfoundation.org
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #   http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 # See the License for the specific language governing permissions and
  14 # limitations under the License.
  15
  16 """
  17 Tool to suggest which ML approach is more applicable for
  18 a particular data and usecase.
  19 TODO:
  20 1. Minimize code.
  21 2. Add Informative data to the user.
  22 3. Check for Size Entry - 1G/K ..
  23 """
  24
  25 from __future__ import print_function
  26 import signal
  27 import sys
  28 from pypsi import wizard as wiz
  29 from pypsi.shell import Shell
  30
  31 # pylint: disable=line-too-long,too-few-public-methods,too-many-instance-attributes, too-many-nested-blocks, too-many-return-statements, too-many-branches
  32
  33 class Bcolors:
  34     """
  35     For Coloring
  36     """
  37     HEADER = '\033[95m'
  38     OKBLUE = '\033[94m'
  39     OKGREEN = '\033[92m'
  40     WARNING = '\033[93m'
  41     FAIL = '\033[91m'
  42     ENDC = '\033[0m'
  43     BOLD = '\033[1m'
  44     UNDERLINE = '\033[4m'
  45
  46 class AlgoSelectorWizard():
  47     """
  48     Class to create wizards
  49     """
  50     def __init__(self):
  51         """
  52         Perform Initialization.
  53         """
  54         self.shell = Shell()
  55         # Set of all values from the user
  56         self.main_values = {}
  57         self.main_l1_values = {}
  58         self.main_l2a_values = {}
  59         self.main_l2b_values = {}
  60         self.main_l3_values = {}
  61         self.main_l4_values = {}
  62         self.unsup_values = {}
  63         self.ri_values = {}
  64         self.gen_values = {}
  65         self.gen_choice_values = {}
  66         self.gen_metrics_values = {}
  67         self.gen_data_main_values = {}
  68         self.gen_data_text_values = {}
  69         self.gen_data_features_values = {}
  70         self.gen_data_signal_values = {}
  71         self.gen_about_data_basic_values = {}
  72         self.gen_about_data_adv_values = {}
  73         self.gen_about_data_output_values = {}
  74         # Set of Wizards.
  75         self.wiz_main = None
  76         self.wiz_main_l1 = None
  77         self.wiz_main_l2_a = None
  78         self.wiz_main_l2_b = None
  79         self.wiz_main_l3 = None
  80         self.wiz_main_l4 = None
  81         self.wiz_generic = None
  82         self.wiz_generic_choice = None
  83         self.wiz_geneirc_metric = None
  84         self.wiz_generic_data_main = None
  85         self.wiz_generic_data_signal = None
  86         self.wiz_generic_data_features = None
  87         self.wiz_generic_data_text = None
  88         self.wiz_generic_data_basic = None
  89         self.wiz_generic_data_adv = None
  90         self.wiz_generic_data_output = None
  91         self.wiz_unsupervised = None
  92         self.wiz_reinforcement = None
  93         # Some Inferences
  94         self.ml_needed = False
  95         self.supervised = False
  96         self.unsupervised = False
  97         self.reinforcement = False
  98         self.data_size = 'high'
  99         self.interpretability = False
 100         self.faster = False
 101         self.ftod_ratio = 'low'
 102         self.reproducibility = False
 103
 104
 105     ############# All the Wizards ##################################
 106
 107     ### GENERIC Wizards - Need for ML ##############################
 108     def main_wizard_l1(self):
 109         """
 110         The Main Wizard L1
 111         """
 112         self.wiz_main_l1 = wiz.PromptWizard(
 113             name=Bcolors.OKBLUE+"Do you Need ML - Data Availability"+Bcolors.ENDC,
 114             description="",
 115             steps=(
 116                 # The list of input prompts to ask the user.
 117                 wiz.WizardStep(
 118                     # ID where the value will be stored
 119                     id="data_availability",
 120                     # Display name
 121                     name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC,
 122                     # Help message
 123                     help="Y/N - Yes/No",
 124                     validators=(wiz.required_validator, wiz.boolean_validator),
 125                     default='Y',
 126                 ),
 127             )
 128         )
 129
 130     def main_wizard_l2_a(self):
 131         """
 132         The Main Wizard L2-A
 133         """
 134         self.wiz_main_l2_a = wiz.PromptWizard(
 135             name=Bcolors.OKBLUE+"Do you Need ML - Data Creation"+Bcolors.ENDC,
 136             description="",
 137             steps=(
 138                 # The list of input prompts to ask the user.
 139                 wiz.WizardStep(
 140                     # ID where the value will be stored
 141                     id="data_creativity",
 142                     # Display name
 143                     name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC,
 144                     # Help message
 145                     help="Y/N - Yes/No",
 146                     validators=(wiz.required_validator, wiz.boolean_validator),
 147                     default='Y',
 148                 ),
 149             )
 150         )
 151
 152     def main_wizard_l2_b(self):
 153         """
 154         The Main Wizard L2-B
 155         """
 156         label = """ One or more meaningful and informative 'tag' to provide context so that a machine learning model can learn from it. For example, labels might indicate whether a photo contains a bird or car, which words were uttered in an audio recording, or if an x-ray contains a tumor. Data labeling is required for a variety of use cases including computer vision, natural language processing, and speech recognition."""
 157         self.wiz_main_l2_b = wiz.PromptWizard(
 158             name=Bcolors.OKBLUE+"Do you Need ML - Data Programmability"+Bcolors.ENDC,
 159             description="",
 160             steps=(
 161                 # The list of input prompts to ask the user.
 162                 wiz.WizardStep(
 163                     # ID where the value will be stored
 164                     id="data_label",
 165                     # Display name
 166                     name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N - Yes/No). Type help for description of label. "+Bcolors.ENDC,
 167                     # Help message
 168                     help=label,
 169                     validators=(wiz.required_validator, wiz.boolean_validator),
 170                     default='Y',
 171                 ),
 172                 wiz.WizardStep(
 173                     # ID where the value will be stored
 174                     id="data_programmability",
 175                     # Display name
 176                     name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
 177                     # Help message
 178                     help="Y/N - Yes/No",
 179                     validators=(wiz.required_validator, wiz.boolean_validator),
 180                     default='N',
 181                 ),
 182             )
 183         )
 184
 185
 186     def main_wizard_l3(self):
 187         """
 188         The Main Wizard L3
 189         """
 190         self.wiz_main_l3 = wiz.PromptWizard(
 191             name=Bcolors.OKBLUE+"Do you Need ML - Data Knowledge"+Bcolors.ENDC,
 192             description="",
 193             steps=(
 194                 # The list of input prompts to ask the user.
 195                 wiz.WizardStep(
 196                     # ID where the value will be stored
 197                     id="data_knowledge",
 198                     # Display name
 199                     name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
 200                     # Help message
 201                     help="Y/N - Yes/No",
 202                     validators=(wiz.required_validator, wiz.boolean_validator),
 203                     default='Y',
 204                 ),
 205             )
 206         )
 207
 208     def main_wizard_l4(self):
 209         """
 210         The Main Wizard - L4
 211         """
 212         self.wiz_main_l4 = wiz.PromptWizard(
 213             name=Bcolors.OKBLUE+"Do you Need ML - Data Pattern"+Bcolors.ENDC,
 214             description="",
 215             steps=(
 216                 # The list of input prompts to ask the user.
 217                 wiz.WizardStep(
 218                     # ID where the value will be stored
 219                     id="data_pattern",
 220                     # Display name
 221                     name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC,
 222                     # Help message
 223                     help="Y/N - Yes/No.",
 224                     validators=(wiz.required_validator, wiz.boolean_validator),
 225                     default='Y'
 226                 ),
 227             )
 228         )
 229     ### GENERIC Wizards - GOAL, METRICS, DATA ##############################
 230     def gen_choice_wizard(self):
 231         """
 232         Generic Wizard - Goal, metrics, data
 233         """
 234         self.wiz_generic_choice = wiz.PromptWizard(
 235             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 236             description="",
 237             steps=(
 238                 # The list of input prompts to ask the user.
 239                 wiz.WizardStep(
 240                     # ID where the value will be stored
 241                     id="data_goal",
 242                     # Display name
 243                     name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC,
 244                     # Help message
 245                     help="Enter one of Predict/Describe/Explore",
 246                     validators=(wiz.required_validator, wiz.choice_validator(['Predict',
 247                                                                               'predict',
 248                                                                               'Describe',
 249                                                                               'describe',
 250                                                                               'Explore',
 251                                                                               'explore'])),
 252                     default='Explore'
 253                 ),
 254                 wiz.WizardStep(
 255                     # ID where the value will be stored
 256                     id="data_metrics_pref",
 257                     # Display name
 258                     name=Bcolors.HEADER+" Do you know which metrics (speed, accuracy, etc.) are more important for you? "+Bcolors.ENDC,
 259                     # Help message
 260                     help="Y/N - Yes/No",
 261                     validators=(wiz.required_validator, wiz.boolean_validator),
 262                     default='Y'
 263                 ),
 264                 wiz.WizardStep(
 265                     # ID where the value will be stored
 266                     id="data_main",
 267                     # Display name
 268                     name=Bcolors.HEADER+" Do you know about the input data type (If its signal/features/text)  ?  "+Bcolors.ENDC,
 269                     # Help message
 270                     help="Y/N - Yes/No",
 271                     validators=(wiz.required_validator, wiz.boolean_validator),
 272                     default='Y'
 273                 ),
 274                 wiz.WizardStep(
 275                     # ID where the value will be stored
 276                     id="data_databasic_pref",
 277                     # Display name
 278                     name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the input data? "+Bcolors.ENDC,
 279                     # Help message
 280                     help="Y/N - Yes/No",
 281                     validators=(wiz.required_validator, wiz.boolean_validator),
 282                     default='Y'
 283                 ),
 284                 wiz.WizardStep(
 285                     # ID where the value will be stored
 286                     id="data_dataadv_pref",
 287                     # Display name
 288                     name=Bcolors.HEADER+" Do you have advanced information (distribution, relation, independency, etc.) about the input data? "+Bcolors.ENDC,
 289                     # Help message
 290                     help="Y/N - Yes/No",
 291                     validators=(wiz.required_validator, wiz.boolean_validator),
 292                     default='Y'
 293                 ),
 294                 wiz.WizardStep(
 295                     # ID where the value will be stored
 296                     id="data_dataoutput_pref",
 297                     # Display name
 298                     name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the output? "+Bcolors.ENDC,
 299                     # Help message
 300                     help="Y/N - Yes/No",
 301                     validators=(wiz.required_validator, wiz.boolean_validator),
 302                     default='Y'
 303                 ),
 304             )
 305         )
 306
 307     def gen_metrics_wizard(self):
 308         """
 309         Generic Wizard - Goal, metrics, data
 310         """
 311         self.wiz_generic_metrics = wiz.PromptWizard(
 312             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
 313             description="",
 314             steps=(
 315                 # The list of input prompts to ask the user.
 316                 wiz.WizardStep(
 317                     # ID where the value will be stored
 318                     id="metric_accuracy",
 319                     # Display name
 320                     name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 321                     # Help message
 322                     help="Enter 1-5: 1 being least important, and 5 being most important",
 323                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 324                     default='1'
 325                 ),
 326                 wiz.WizardStep(
 327                     # ID where the value will be stored
 328                     id="metric_speed",
 329                     # Display name
 330                     name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 331                     # Help message
 332                     help="Enter 1-5: 1 being least important, and 5 being most important",
 333                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 334                     default='1'
 335                 ),
 336                 wiz.WizardStep(
 337                     # ID where the value will be stored
 338                     id="metric_interpretability",
 339                     # Display name
 340                     name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 341                     # Help message
 342                     help="Enter 1-5: 1 being least important, and 5 being most important",
 343                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 344                     default='1'
 345                 ),
 346                 wiz.WizardStep(
 347                     # ID where the value will be stored
 348                     id="metric_reproducibility",
 349                     # Display name
 350                     name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 351                     # Help message
 352                     help="Enter 1-5: 1 being least important, and 5 being most important",
 353                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 354                     default='1'
 355                 ),
 356                 wiz.WizardStep(
 357                     # ID where the value will be stored
 358                     id="metric_implementation",
 359                     # Display name
 360                     name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 361                     # Help message
 362                     help="Enter 1-5: 1 being least important, and 5 being most important",
 363                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 364                     default='1'
 365                 ),
 366             )
 367         )
 368
 369     def gen_data_main_wizard(self):
 370         """
 371         Generic Wizard - Goal, metrics, data
 372         """
 373         self.wiz_generic_data_main = wiz.PromptWizard(
 374             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 375             description="",
 376             steps=(
 377                 # The list of input prompts to ask the user.
 378                 wiz.WizardStep(
 379                     # ID where the value will be stored
 380                     id="data_column",
 381                     # Display name
 382                     name=Bcolors.HEADER+" What does the data (columns) represent? Please type help and select the associated number"+Bcolors.ENDC,
 383                     # Help message
 384                     help="1. Well Defined Features\n 2. Signals - Timeseries, pixels, etc\n 3. Text - Unstructured\n 4. None of the above\n",
 385                     validators=(wiz.required_validator, wiz.int_validator(1, 4)),
 386                     default='1'
 387                 ),
 388             )
 389         )
 390
 391     def gen_data_signal_wizard(self):
 392         """
 393         Generic Wizard - Goal, metrics, data
 394         """
 395         self.wiz_generic_data_signal = wiz.PromptWizard(
 396             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 397             description="",
 398             steps=(
 399                 # The list of input prompts to ask the user.
 400                 wiz.WizardStep(
 401                     # ID where the value will be stored
 402                     id="data_signal_type",
 403                     # Display name
 404                     name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? Please type help for list "+Bcolors.ENDC,
 405                     # Help message
 406                     help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n  ",
 407                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 408                     default='3'
 409                 ),
 410             )
 411         )
 412
 413     def gen_data_features_wizard(self):
 414         """
 415         Generic Wizard - Goal, metrics, data
 416         """
 417         self.wiz_generic_data_features = wiz.PromptWizard(
 418             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 419             description="",
 420             steps=(
 421                 # The list of input prompts to ask the user.
 422                 wiz.WizardStep(
 423                     # ID where the value will be stored
 424                     id="data_features",
 425                     # Display name
 426                     name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC,
 427                     # Help message
 428                     help="Y/N",
 429                     validators=(wiz.required_validator, wiz.boolean_validator),
 430                     default='Y'
 431                 ),
 432                 wiz.WizardStep(
 433                     # ID where the value will be stored
 434                     id="data_features_count",
 435                     # Display name
 436                     name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC,
 437                     # Help message
 438                     help="Number only - Approximate should be OK.",
 439                     validators=(wiz.required_validator, wiz.int_validator(1, 100000)),
 440                     default='10'
 441                 ),
 442             )
 443         )
 444
 445     def gen_data_text_wizard(self):
 446         """
 447         Generic Wizard - Goal, metrics, data
 448         """
 449         self.wiz_generic_data_text = wiz.PromptWizard(
 450             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 451             description="",
 452             steps=(
 453                 # The list of input prompts to ask the user.
 454                 wiz.WizardStep(
 455                     # ID where the value will be stored
 456                     id="data_text_type",
 457                     # Display name
 458                     name=Bcolors.HEADER+" If Text, can you choose any one from the below list? Please type help for list"+Bcolors.ENDC,
 459                     # Help message
 460                     help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n  ",
 461                     validators=(wiz.required_validator, wiz.int_validator(1, 8)),
 462                     default='3'
 463                 ),
 464
 465             )
 466         )
 467
 468     def gen_about_data_basic_wizard(self):
 469         """
 470         Generic Wizard - Goal, metrics, data
 471         """
 472         self.wiz_generic_data_basic = wiz.PromptWizard(
 473             name=Bcolors.OKBLUE+"Understanding Basic Input Data Information"+Bcolors.ENDC,
 474             description="",
 475             steps=(
 476                 # The list of input prompts to ask the user.
 477                 wiz.WizardStep(
 478                     # ID where the value will be stored
 479                     id="data_missing",
 480                     # Display name
 481                     name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC,
 482                     # Help message
 483                     help="Y/N",
 484                     validators=(wiz.required_validator, wiz.boolean_validator),
 485                     default='N'
 486                 ),
 487                 wiz.WizardStep(
 488                     # ID where the value will be stored
 489                     id="data_size_bytes",
 490                     # Display name
 491                     name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC,
 492                     # Help message
 493                     help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes",
 494                     validators=(wiz.required_validator),
 495                     default='1G'
 496                 ),
 497                 wiz.WizardStep(
 498                     # ID where the value will be stored
 499                     id="data_size_samples",
 500                     # Display name
 501                     name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC,
 502                     # Help message
 503                     help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples",
 504                     validators=(wiz.required_validator),
 505                     default='1M'
 506                 ),
 507             )
 508         )
 509
 510     def gen_about_data_advanced_wizard(self):
 511         """
 512         Generic Wizard - Goal, metrics, data
 513         """
 514         self.wiz_generic_data_adv = wiz.PromptWizard(
 515             name=Bcolors.OKBLUE+"Understanding Advanced Input Data Information"+Bcolors.ENDC,
 516             description="",
 517             steps=(
 518                 # The list of input prompts to ask the user.
 519                 wiz.WizardStep(
 520                     # ID where the value will be stored
 521                     id="data_distribution",
 522                     # Display name
 523                     name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC,
 524                     # Help message
 525                     help="Y/N - Yes",
 526                     validators=(wiz.required_validator, wiz.boolean_validator),
 527                     default='Y'
 528                 ),
 529                 wiz.WizardStep(
 530                     # ID where the value will be stored
 531                     id="data_io_relation",
 532                     # Display name
 533                     name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC,
 534                     # Help message
 535                     help="Y/N - Yes/No",
 536                     validators=(wiz.required_validator, wiz.boolean_validator),
 537                     default='Y'
 538                 ),
 539                 wiz.WizardStep(
 540                     # ID where the value will be stored
 541                     id="data_correlation",
 542                     # Display name
 543                     name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC,
 544                     # Help message
 545                     help="Y/N/ - Yes/No ",
 546                     validators=(wiz.required_validator, wiz.boolean_validator),
 547                     default='Y'
 548                 ),
 549                 wiz.WizardStep(
 550                     # ID where the value will be stored
 551                     id="data_cond_indep",
 552                     # Display name
 553                     name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC,
 554                     # Help message
 555                     help="Y/N/. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent",
 556                     validators=(wiz.required_validator, wiz.boolean_validator),
 557                     default='Y'
 558                 ),
 559             )
 560         )
 561
 562     def gen_about_output_wizard(self):
 563         """
 564         Generic Wizard - Goal, metrics, data
 565         """
 566         self.wiz_generic_data_output = wiz.PromptWizard(
 567             name=Bcolors.OKBLUE+"Understanding Data Output"+Bcolors.ENDC,
 568             description="",
 569             steps=(
 570                 # The list of input prompts to ask the user.
 571                 wiz.WizardStep(
 572                     # ID where the value will be stored
 573                     id="data_type_output",
 574                     # Display name
 575                     name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
 576                     # Help message
 577                     help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass",
 578                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 579                     default='1'
 580                 ),
 581                 wiz.WizardStep(
 582                     # ID where the value will be stored
 583                     id="data_output_prob",
 584                     # Display name
 585                     name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC,
 586                     # Help message
 587                     help="Y/N",
 588                     validators=(wiz.required_validator, wiz.boolean_validator),
 589                     default='N'
 590                 ),
 591             )
 592         )
 593
 594
 595     def unsupervised_wizard(self):
 596         """
 597         The Un-Supervized Learning Wizard
 598         """
 599         self.wiz_unsupervised = wiz.PromptWizard(
 600             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
 601             description="",
 602             steps=(
 603                 # The list of input prompts to ask the user.
 604                 wiz.WizardStep(
 605                     # ID where the value will be stored
 606                     id="unsup_goal",
 607                     # Display name
 608                     name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC,
 609                     # Help message
 610                     help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n",
 611                     validators=(wiz.required_validator, wiz.int_validator(1, 3)),
 612                     default='1'
 613                 ),
 614                 wiz.WizardStep(
 615                     # ID where the value will be stored
 616                     id="unsup_dr_topic_mod",
 617                     # Display name
 618                     name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC,
 619                     # Help message
 620                     help="Y/N/NA",
 621                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 622                                                                               'y','n','na','nA'])),
 623                     default='NA'
 624                 ),
 625                 wiz.WizardStep(
 626                     # ID where the value will be stored
 627                     id="unsup_clus_dv",
 628                     # Display name
 629                     name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
 630                     # Help message
 631                     help="Y/N/NA",
 632                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 633                                                                               'y','n','na','nA'])),
 634                     default='NA'
 635                 ),
 636                 wiz.WizardStep(
 637                     # ID where the value will be stored
 638                     id="unsup_clus_outliers",
 639                     # Display name
 640                     name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
 641                     # Help message
 642                     help="Y/N/NA",
 643                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 644                                                                               'y','n','na','nA'])),
 645                     default='NA'
 646                 ),
 647                 wiz.WizardStep(
 648                     # ID where the value will be stored
 649                     id="unsup_clus_groups",
 650                     # Display name
 651                     name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC,
 652                     # Help message
 653                     help="Y/N/NA",
 654                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 655                                                                               'y','n','na','nA'])),
 656                     default='NA'
 657                 ),
 658
 659             )
 660         )
 661
 662     def reinforcement_wizard(self):
 663         """
 664         The Reinforced Learning Wizard
 665         """
 666         message = """
 667             Reward  |--------|
 668             |-------| Agent  |  Action
 669             | |-----|        |-------|
 670             | |     |--------|       |
 671             | |state                 |
 672             | |                      |
 673             | |    |-----------|     |
 674             | |----|Environment|     |
 675             |------|           |-----|
 676                    |-----------|
 677             """
 678         self.wiz_reinforcement = wiz.PromptWizard(
 679             name=Bcolors.OKBLUE+"Reinforcement Specific"+Bcolors.ENDC,
 680             description="",
 681             steps=(
 682                 # The list of input prompts to ask the user.
 683                 wiz.WizardStep(
 684                     # ID where the value will be stored
 685                     id="ri_info",
 686                     # Display name
 687                     name=Bcolors.HEADER+" Type help for reference diagram for reinforcement-learning"+Bcolors.ENDC,
 688                     # Help message
 689                     help=message,
 690                     validators=(wiz.required_validator),
 691                     default='Type Help or Press Enter'
 692                 ),
 693                 wiz.WizardStep(
 694                     # ID where the value will be stored
 695                     id="ri_model_preference",
 696                     # Display name
 697                     name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC,
 698                     # Help message
 699                     help="Y/N/NA",
 700                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 701                                                                               'y','n','na','nA'])),
 702                     default='Y'
 703                 ),
 704                 wiz.WizardStep(
 705                     # ID where the value will be stored
 706                     id="ri_model_availability",
 707                     # Display name
 708                     name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
 709                     # Help message
 710                     help="Y/N/NA",
 711                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 712                                                                               'y','n','na','nA'])),
 713                     default='Y'
 714                 ),
 715                 wiz.WizardStep(
 716                     # ID where the value will be stored
 717                     id="ri_modelfree_value",
 718                     # Display name
 719                     name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
 720                     # Help message
 721                     help="Y/N/NA",
 722                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 723                                                                               'y','n','na','nA'])),
 724                     default='Y'
 725                 ),
 726                 wiz.WizardStep(
 727                     # ID where the value will be stored
 728                     id="ri_modelfree_value_state",
 729                     # Display name
 730                     name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC,
 731                     # Help message
 732                     help="Y/N/NA",
 733                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 734                                                                               'y','n','na','nA'])),
 735                     default='Y'
 736                 ),
 737                 wiz.WizardStep(
 738                     # ID where the value will be stored
 739                     id="ri_app_domain",
 740                     # Display name
 741                     name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
 742                     # Help message
 743                     help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n",
 744                     validators=(wiz.required_validator, wiz.int_validator(1, 8)),
 745                     default='1'
 746                 ),
 747             )
 748         )
 749
 750     ############### All the Run Operations ######################
 751     def run_mainwiz(self):
 752         """
 753         Run the Main Wizard
 754         """
 755         self.main_wizard_l1()
 756         self.main_l1_values = self.wiz_main_l1.run(self.shell)
 757         if self.main_l1_values['data_availability']:
 758             print("OK-1")
 759             self.main_wizard_l2_b()
 760             self.main_l2b_values = self.wiz_main_l2_b.run(self.shell)
 761             if self.main_l2b_values['data_label']:
 762                 self.supervised = True
 763             else:
 764                 self.unsupervised = True
 765             if self.main_l2b_values['data_programmability']:
 766                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
 767             else:
 768                 self.main_wizard_l3()
 769                 self.main_l3_values = self.wiz_main_l3.run(self.shell)
 770                 if self.main_l3_values['data_knowledge']:
 771                     print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
 772                     self.ml_needed = True
 773                 else:
 774                     self.main_wizard_l4()
 775                     self.main_l4_values = self.wiz_main_l4.run(self.shell)
 776                     if self.main_l4_values['data_pattern']:
 777                         print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
 778                         self.ml_needed = True
 779                     else:
 780                         print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
 781         else:
 782             self.main_wizard_l2_a()
 783             self.main_l2a_values = self.wiz_main_l2_a.run(self.shell)
 784             if self.main_l2a_values['data_creativity']:
 785                 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
 786                 self.ml_needed = True
 787                 self.reinforcement = True
 788             else:
 789                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
 790
 791     def run_generic_wizard(self):
 792         """
 793         Run Generic Wizard
 794         """
 795         self.gen_choice_wizard()
 796         self.gen_choice_values = self.wiz_generic_choice.run(self.shell)
 797         if self.gen_choice_values['data_metrics_pref']:
 798             self.gen_metrics_wizard()
 799             self.gen_metrics_values = self.wiz_generic_metrics.run(self.shell)
 800         if self.gen_choice_values['data_main']:
 801             self.gen_data_main_wizard()
 802             self.gen_data_main_values = self.wiz_generic_data_main.run(self.shell)
 803             if int(self.gen_data_main_values['data_column']) == 3:
 804                 self.gen_data_text_wizard()
 805                 self.gen_data_text_values = self.wiz_generic_data_text.run(self.shell)
 806             else:
 807                 self.gen_data_text_values = {'data_text_type': '3'}
 808             if int(self.gen_data_main_values['data_column']) == 1:
 809                 self.gen_data_features_wizard()
 810                 self.gen_data_features_values = self.wiz_generic_data_features.run(self.shell)
 811             else:
 812                 self.gen_data_features_values = {'data_features': 'Y',
 813                                                  'data_features_count': '10'}
 814             if int(self.gen_data_main_values['data_column']) == 2:
 815                 self.gen_data_signal_wizard()
 816                 self.gen_data_signal_values = self.wiz_generic_data_signal.run(self.shell)
 817             else:
 818                 self.gen_data_signal_values = {'data_signal_type': '1'}
 819         else:
 820             self.gen_data_main_values = {'data_column': '1'}
 821             print("Unknown Data Type")
 822         if self.gen_choice_values['data_databasic_pref']:
 823             self.gen_about_data_basic_wizard()
 824             self.gen_about_data_basic_values = self.wiz_generic_data_basic.run(self.shell)
 825         else:
 826             self.gen_about_data_basic_values = {'data_missing':'N',
 827                                                 'data_size_bytes': '1G',
 828                                                 'data_size_samples': '1M'}
 829         if self.gen_choice_values['data_dataadv_pref']:
 830             self.gen_about_data_advanced_wizard()
 831             self.gen_about_data_adv_values = self.wiz_generic_data_adv.run(self.shell)
 832         else:
 833             self.gen_about_data_adv_values = {'data_distribution': 'N',
 834                                               'data_io_relation': 'N',
 835                                               'data_correlation': 'N',
 836                                               'data_cond_indep': 'N'}
 837         if self.gen_choice_values['data_dataoutput_pref']:
 838             self.gen_about_output_wizard()
 839             self.gen_about_data_output_values = self.wiz_generic_data_output.run(self.shell)
 840         else:
 841             self.gen_about_data_output_values = {'data_type_output': '1',
 842                                                  'data_output_prob': 'N'}
 843
 844
 845     def run_unsupervised_wizard(self):
 846         """
 847         Run UnSupervised Learning Wizard.
 848         """
 849         self.unsupervised_wizard()
 850         self.unsup_values = self.wiz_unsupervised.run(self.shell)
 851
 852     def run_reinforcement_wizard(self):
 853         """
 854         Run Reinforced Learning Wizard
 855         """
 856         self.reinforcement_wizard()
 857         self.ri_values = self.wiz_reinforcement.run(self.shell)
 858
 859     def decide_unsupervised(self):
 860         """
 861         Decide which Unsupervised-learning to use
 862         """
 863         repro = False
 864         clus_prob = False
 865         if int(self.unsup_values['unsup_goal']) == 1:
 866             # Clustering
 867             if 'high' in self.data_size:
 868                 if not self.reproducibility:
 869                     clus_prob = True
 870                 else:
 871                     repro = True
 872             else:
 873                 if 'y' in self.unsup_values['unsup_clus_dv'].lower():
 874                     if 'y' in self.unsup_values['unsup_clus_groups'].lower():
 875                         clus_prob = True
 876                     else:
 877                         print("Unsupervised Learning model to consider: Hierarchical Clustering")
 878                         return
 879                 else:
 880                     repro = True
 881             if repro:
 882                 if 'y' in self.unsup_values['unsup_clus_outliers'].lower():
 883                     print("Unsupervised Learning model to consider: Hierarchical Clustering")
 884                 else:
 885                     print("Unsupervised Learning model to consider: DBSCAN")
 886                 return
 887             if clus_prob:
 888                 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
 889                     print("Unsupervised Learning model to consider: Gaussian Mixture")
 890                 else:
 891                     print("Unsupervised Learning model to consider: KMeans")
 892                 return
 893         elif int(self.unsup_values['unsup_goal']) == 2:
 894             # Dimensionality Reduction
 895             if 'y' in self.unsup_values['unsup_dr_topic_mod'].lower():
 896                 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
 897                     print("Unsupervised Learning model to consider: SVD")
 898                 else:
 899                     print("Unsupervised Learning model to consider: LDA")
 900             else:
 901                 print("Unsupervised Learning model to consider: PCA")
 902         else:
 903             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
 904
 905     def decide_reinforcement(self):
 906         """
 907         Decide which reinforement learning to use.
 908         """
 909         if (int(self.gen_about_data_output_values['data_type_output']) == 2 or
 910                 'y' in self.ri_values['ri_model_preference'].lower()):
 911             # Model Bsaed
 912             if 'y' in self.ri_values['ri_model_availability'].lower():
 913                 print("Reinforcement Learning model to consider - AlphaZero")
 914             else:
 915                 print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE")
 916         elif 'n' in self.ri_values['ri_model_preference'].lower():
 917             # Model-Free based approach.
 918             if 'y' not in self.ri_values['ri_modelfree_value'].lower():
 919                 print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic")
 920             else:
 921                 if 'y' in self.ri_values['ri_modelfree_value_state'].lower():
 922                     print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)")
 923                 else:
 924                     print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets")
 925         else:
 926             # Default
 927             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
 928
 929     def perform_inference(self):
 930         """
 931         Perform Inferences. Used across all 3 types.
 932         """
 933         # Decide whether data is Low or High
 934         self.data_size = 'unknown'
 935         if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
 936                 't' in self.gen_about_data_basic_values['data_size_samples']):
 937             self.data_size = 'low'
 938
 939         if int(self.gen_metrics_values['metric_interpretability']) >= 3 :
 940             self.interpretability = True
 941         if int(self.gen_metrics_values['metric_speed']) >= 3 :
 942             self.faster = True
 943         if int(self.gen_metrics_values['metric_reproducibility']) >= 3 :
 944             self.reproducibility = True
 945
 946         # Decide Features relative to Data (ftod_ratio) - high/low
 947         if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
 948                 't' in self.gen_about_data_basic_values['data_size_samples']):
 949             if int(self.gen_data_features_values['data_features_count']) > 50:
 950                 self.ftod_ratio = 'high'
 951         elif ('m' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
 952                 'm' in self.gen_about_data_basic_values['data_size_samples']):
 953             if int(self.gen_data_features_values['data_features_count']) > 5000:
 954                 self.ftod_ratio = 'high'
 955         else:
 956             if int(self.gen_data_features_values['data_features_count']) > 500000:
 957                 self.ftod_ratio = 'high'
 958
 959
 960     def decide_supervised(self):
 961         """
 962         Decide which Supervised learning to use.
 963         """
 964         if 'high' in self.data_size:
 965             # Cover: DT, RF, RNN, CNN, ANN and Naive Bayes
 966             if self.interpretability:
 967                 if self.faster:
 968                     print("Supervised Learning model to consider  - Decision Tree")
 969                 else:
 970                     print("Supervised Learning model to consider  - Random Forest")
 971             else:
 972                 if int(self.gen_data_main_values['data_column']) == 3:
 973                     print("Supervised Learning model to consider  - RNN")
 974                 elif (int(self.gen_data_main_values['data_column']) == 2 and
 975                         int(self.gen_data_signal_values['data_signal_type']) == 1):
 976                     print("Supervised Learning model to consider  - CNN")
 977                 elif (int(self.gen_data_main_values['data_column']) == 2 and
 978                         (int(self.gen_data_signal_values['data_signal_type']) == 2 or
 979                             int(self.gen_data_signal_values['data_signal_type']) == 3)):
 980                     if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
 981                         print("Supervised Learning model to consider  - Naive Bayes")
 982                     else:
 983                         print("Supervised Learning model to consider  - ANN")
 984                 else:
 985                     print("Supervised model to consider  Learning - ANN")
 986         elif 'low' in self.data_size:
 987             from_b = False
 988             # Cover: Regressions
 989             if 'high' in self.ftod_ratio:
 990                 from_b = True
 991             else:
 992                 print("Supervised Learning model to consider  - SVN with Gaussian Kernel")
 993                 return
 994             if int(self.gen_about_data_output_values['data_type_output']) != 2:
 995                 from_b = True
 996             else:
 997                 if 'y' in self.gen_about_data_adv_values['data_io_relation'].lower():
 998                     print("Supervised Learning model to consider  - Linear Regression or Linear SVM")
 999                 else:
1000                     print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
1001                 return
1002             if from_b:
1003                 if int(self.gen_about_data_output_values['data_output_type']) == 4:
1004                     if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
1005                         if 'y' in self.gen_about_data_adv_values['data_cond_indep'].lower():
1006                             print("Supervised Learning model to consider  - Naive Bayes")
1007                         else:
1008                             if 'y' in self.gen_about_data_adv_values['data_correlation'].lower():
1009                                 print("Supervised Learning model to consider  - LASSO or Ridge Regression")
1010                             else:
1011                                 print("Supervised Learning model to consider  - Logistic Regression")
1012                     else:
1013                         print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
1014
1015                 else:
1016                     print("Supervised Learning model to consider - KNN")
1017         else:
1018             # Default
1019             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
1020
1021     def ask_and_decide(self):
1022         """
1023         THe Main Engine
1024         """
1025         self.run_mainwiz()
1026         if self.ml_needed:
1027             self.run_generic_wizard()
1028             if self.supervised:
1029                 self.decide_supervised()
1030             elif self.unsupervised:
1031                 self.run_unsupervised_wizard()
1032                 self.decide_unsupervised()
1033             elif self.reinforcement:
1034                 self.run_reinforcement_wizard()
1035                 self.decide_reinforcement()
1036
1037
1038 def signal_handler(signum, frame):
1039     """
1040     Signal Handler
1041     """
1042     print("\n You interrupted, No Suggestion will be provided!")
1043     print(signum, frame)
1044     sys.exit(0)
1045
1046 def main():
1047     """
1048     The Main Function
1049     """
1050     try:
1051         algowiz = AlgoSelectorWizard()
1052         algowiz.ask_and_decide()
1053     except(KeyboardInterrupt, MemoryError):
1054         print("Some Error Occured - No Suggestion can be provided")
1055
1056     print("Thanks for using the Algoselector-Wizard, " +
1057             "Hope our suggestion will be useful")
1058
1059 if __name__ == "__main__":
1060     signal.signal(signal.SIGINT, signal_handler)
1061     main()