tools/modelselector/modelselector.py

   1 # Copyright 2022 Linux Foundation.
   2 # srao@linuxfoundation.org
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #   http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 # See the License for the specific language governing permissions and
  14 # limitations under the License.
  15
  16 """
  17 Tool to suggest which ML approach is more applicable for
  18 a particular data and usecase.
  19 TODO:
  20 1. Minimize code.
  21 2. Add Informative data to the user.
  22 3. Check for Size Entry - 1G/K ..
  23 """
  24
  25 from __future__ import print_function
  26 import signal
  27 import sys
  28 from pypsi import wizard as wiz
  29 from pypsi.shell import Shell
  30
  31 # pylint: disable=line-too-long,too-few-public-methods,too-many-instance-attributes, too-many-nested-blocks, too-many-return-statements, too-many-branches
  32
  33 class Bcolors:
  34     """
  35     For Coloring
  36     """
  37     HEADER = '\033[95m'
  38     OKBLUE = '\033[94m'
  39     OKGREEN = '\033[92m'
  40     WARNING = '\033[93m'
  41     FAIL = '\033[91m'
  42     ENDC = '\033[0m'
  43     BOLD = '\033[1m'
  44     UNDERLINE = '\033[4m'
  45
  46 class AlgoSelectorWizard():
  47     """
  48     Class to create wizards
  49     """
  50     def __init__(self):
  51         """
  52         Perform Initialization.
  53         """
  54         self.shell = Shell()
  55         # Set of all values from the user
  56         self.main_values = {}
  57         self.main_l1_values = {}
  58         self.main_l2a_values = {}
  59         self.main_l2b_values = {}
  60         self.main_l3_values = {}
  61         self.main_l4_values = {}
  62         self.unsup_values = {}
  63         self.ri_values = {}
  64         self.gen_values = {}
  65         self.gen_choice_values = {}
  66         self.gen_metrics_values = {}
  67         self.gen_data_main_values = {}
  68         self.gen_data_text_values = {}
  69         self.gen_data_features_values = {}
  70         self.gen_data_signal_values = {}
  71         self.gen_about_data_basic_values = {}
  72         self.gen_about_data_adv_values = {}
  73         self.gen_about_data_output_values = {}
  74         self.gans_values = {}
  75         # Set of Wizards.
  76         self.wiz_main = None
  77         self.wiz_main_l1 = None
  78         self.wiz_main_l2_a = None
  79         self.wiz_main_l2_b = None
  80         self.wiz_main_l3 = None
  81         self.wiz_main_l4 = None
  82         self.wiz_generic = None
  83         self.wiz_generic_choice = None
  84         self.wiz_geneirc_metric = None
  85         self.wiz_generic_data_main = None
  86         self.wiz_generic_data_signal = None
  87         self.wiz_generic_data_features = None
  88         self.wiz_generic_data_text = None
  89         self.wiz_generic_data_basic = None
  90         self.wiz_generic_data_adv = None
  91         self.wiz_generic_data_output = None
  92         self.wiz_unsupervised = None
  93         self.wiz_reinforcement = None
  94         self.wiz_gans = None
  95         # Some Inferences
  96         self.ml_needed = False
  97         self.ml_gans = False
  98         self.supervised = False
  99         self.unsupervised = False
 100         self.reinforcement = False
 101         self.data_size = 'high'
 102         self.interpretability = False
 103         self.faster = False
 104         self.ftod_ratio = 'low'
 105         self.reproducibility = False
 106
 107
 108     ############# All the Wizards ##################################
 109
 110     ### GENERIC Wizards - Need for ML ##############################
 111     def main_wizard_l1(self):
 112         """
 113         The Main Wizard L1
 114         """
 115         self.wiz_main_l1 = wiz.PromptWizard(
 116             name=Bcolors.OKBLUE+"Do you Need ML - Data Availability"+Bcolors.ENDC,
 117             description="",
 118             steps=(
 119                 # The list of input prompts to ask the user.
 120                 wiz.WizardStep(
 121                     # ID where the value will be stored
 122                     id="data_availability",
 123                     # Display name
 124                     name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC,
 125                     # Help message
 126                     help="Y/N - Yes/No",
 127                     validators=(wiz.required_validator, wiz.boolean_validator),
 128                     default='Y',
 129                 ),
 130             )
 131         )
 132
 133     def gans_wizard(self):
 134         """
 135         The GANs Wizard
 136         """
 137         self.wiz_gans = wiz.PromptWizard(
 138             name=Bcolors.OKBLUE+"Synthetic Data Genration using GANs"+Bcolors.ENDC,
 139             description="",
 140             steps=(
 141                 # The list of input prompts to ask the user.
 142                 wiz.WizardStep(
 143                     # ID where the value will be stored
 144                     id="gans_data_type",
 145                     # Display name
 146                     name=Bcolors.HEADER+"Is the sample data you have is time-series? Answer Y/N - Yes/No"+Bcolors.ENDC,
 147                     # Help message
 148                     help="Y/N - Yes/No",
 149                     validators=(wiz.required_validator, wiz.boolean_validator),
 150                     default='Y',
 151                 ),
 152                 wiz.WizardStep(
 153                     # ID where the value will be stored
 154                     id="gans_data_variables",
 155                     # Display name
 156                     name=Bcolors.HEADER+"Is the sample data you have is multi-variate (more than one features/columns) ? Answer Y/N - Yes/No"+Bcolors.ENDC,
 157                     # Help message
 158                     help="Y/N - Yes/No",
 159                     validators=(wiz.required_validator, wiz.boolean_validator),
 160                     default='Y',
 161                 ),
 162             )
 163         )
 164
 165
 166     def main_wizard_l2_a(self):
 167         """
 168         The Main Wizard L2-A
 169         """
 170         self.wiz_main_l2_a = wiz.PromptWizard(
 171             name=Bcolors.OKBLUE+"Do you Need ML - Data Creation"+Bcolors.ENDC,
 172             description="",
 173             steps=(
 174                 # The list of input prompts to ask the user.
 175                 wiz.WizardStep(
 176                     # ID where the value will be stored
 177                     id="data_creativity",
 178                     # Display name
 179                     name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC,
 180                     # Help message
 181                     help="Y/N - Yes/No",
 182                     validators=(wiz.required_validator, wiz.boolean_validator),
 183                     default='Y',
 184                 ),
 185             )
 186         )
 187
 188     def main_wizard_l2_b(self):
 189         """
 190         The Main Wizard L2-B
 191         """
 192         gan = """ Synthetic data generation is an important use-case for Telco-scenarios, due to difficulty in getting good dataset."""
 193         label = """ One or more meaningful and informative 'tag' to provide context so that a machine learning model can learn from it. For example, labels might indicate whether a photo contains a bird or car, which words were uttered in an audio recording, or if an x-ray contains a tumor. Data labeling is required for a variety of use cases including computer vision, natural language processing, and speech recognition."""
 194         self.wiz_main_l2_b = wiz.PromptWizard(
 195             name=Bcolors.OKBLUE+"Do you Need ML - Data Programmability"+Bcolors.ENDC,
 196             description="",
 197             steps=(
 198                 # The list of input prompts to ask the user.
 199                 wiz.WizardStep(
 200                     # ID where the value will be stored
 201                     id="data_generation",
 202                     # Display name
 203                     name=Bcolors.HEADER+" Do you want to generate Synthetic Data from the existing data (Type Y/N - Yes/No). Type helfp for the description"+Bcolors.ENDC,
 204                     # Help message
 205                     help=gan,
 206                     validators=(wiz.required_validator, wiz.boolean_validator),
 207                     default='N',
 208                 ),
 209                 wiz.WizardStep(
 210                     # ID where the value will be stored
 211                     id="data_label",
 212                     # Display name
 213                     name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N - Yes/No). Type help for description of label. "+Bcolors.ENDC,
 214                     # Help message
 215                     help=label,
 216                     validators=(wiz.required_validator, wiz.boolean_validator),
 217                     default='Y',
 218                 ),
 219                 wiz.WizardStep(
 220                     # ID where the value will be stored
 221                     id="data_programmability",
 222                     # Display name
 223                     name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
 224                     # Help message
 225                     help="Y/N - Yes/No",
 226                     validators=(wiz.required_validator, wiz.boolean_validator),
 227                     default='N',
 228                 ),
 229             )
 230         )
 231
 232
 233     def main_wizard_l3(self):
 234         """
 235         The Main Wizard L3
 236         """
 237         self.wiz_main_l3 = wiz.PromptWizard(
 238             name=Bcolors.OKBLUE+"Do you Need ML - Data Knowledge"+Bcolors.ENDC,
 239             description="",
 240             steps=(
 241                 # The list of input prompts to ask the user.
 242                 wiz.WizardStep(
 243                     # ID where the value will be stored
 244                     id="data_knowledge",
 245                     # Display name
 246                     name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
 247                     # Help message
 248                     help="Y/N - Yes/No",
 249                     validators=(wiz.required_validator, wiz.boolean_validator),
 250                     default='Y',
 251                 ),
 252             )
 253         )
 254
 255     def main_wizard_l4(self):
 256         """
 257         The Main Wizard - L4
 258         """
 259         self.wiz_main_l4 = wiz.PromptWizard(
 260             name=Bcolors.OKBLUE+"Do you Need ML - Data Pattern"+Bcolors.ENDC,
 261             description="",
 262             steps=(
 263                 # The list of input prompts to ask the user.
 264                 wiz.WizardStep(
 265                     # ID where the value will be stored
 266                     id="data_pattern",
 267                     # Display name
 268                     name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC,
 269                     # Help message
 270                     help="Y/N - Yes/No.",
 271                     validators=(wiz.required_validator, wiz.boolean_validator),
 272                     default='Y'
 273                 ),
 274             )
 275         )
 276     ### GENERIC Wizards - GOAL, METRICS, DATA ##############################
 277     def gen_choice_wizard(self):
 278         """
 279         Generic Wizard - Goal, metrics, data
 280         """
 281         self.wiz_generic_choice = wiz.PromptWizard(
 282             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 283             description="",
 284             steps=(
 285                 # The list of input prompts to ask the user.
 286                 wiz.WizardStep(
 287                     # ID where the value will be stored
 288                     id="data_goal",
 289                     # Display name
 290                     name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC,
 291                     # Help message
 292                     help="Enter one of Predict/Describe/Explore",
 293                     validators=(wiz.required_validator, wiz.choice_validator(['Predict',
 294                                                                               'predict',
 295                                                                               'Describe',
 296                                                                               'describe',
 297                                                                               'Explore',
 298                                                                               'explore'])),
 299                     default='Explore'
 300                 ),
 301                 wiz.WizardStep(
 302                     # ID where the value will be stored
 303                     id="data_metrics_pref",
 304                     # Display name
 305                     name=Bcolors.HEADER+" Do you know which metrics (speed, accuracy, etc.) are more important for you? "+Bcolors.ENDC,
 306                     # Help message
 307                     help="Y/N - Yes/No",
 308                     validators=(wiz.required_validator, wiz.boolean_validator),
 309                     default='Y'
 310                 ),
 311                 wiz.WizardStep(
 312                     # ID where the value will be stored
 313                     id="data_main",
 314                     # Display name
 315                     name=Bcolors.HEADER+" Do you know about the input data type (If its signal/features/text)  ?  "+Bcolors.ENDC,
 316                     # Help message
 317                     help="Y/N - Yes/No",
 318                     validators=(wiz.required_validator, wiz.boolean_validator),
 319                     default='Y'
 320                 ),
 321                 wiz.WizardStep(
 322                     # ID where the value will be stored
 323                     id="data_databasic_pref",
 324                     # Display name
 325                     name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the input data? "+Bcolors.ENDC,
 326                     # Help message
 327                     help="Y/N - Yes/No",
 328                     validators=(wiz.required_validator, wiz.boolean_validator),
 329                     default='Y'
 330                 ),
 331                 wiz.WizardStep(
 332                     # ID where the value will be stored
 333                     id="data_dataadv_pref",
 334                     # Display name
 335                     name=Bcolors.HEADER+" Do you have advanced information (distribution, relation, independency, etc.) about the input data? "+Bcolors.ENDC,
 336                     # Help message
 337                     help="Y/N - Yes/No",
 338                     validators=(wiz.required_validator, wiz.boolean_validator),
 339                     default='Y'
 340                 ),
 341                 wiz.WizardStep(
 342                     # ID where the value will be stored
 343                     id="data_dataoutput_pref",
 344                     # Display name
 345                     name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the output? "+Bcolors.ENDC,
 346                     # Help message
 347                     help="Y/N - Yes/No",
 348                     validators=(wiz.required_validator, wiz.boolean_validator),
 349                     default='Y'
 350                 ),
 351             )
 352         )
 353
 354     def gen_metrics_wizard(self):
 355         """
 356         Generic Wizard - Goal, metrics, data
 357         """
 358         self.wiz_generic_metrics = wiz.PromptWizard(
 359             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
 360             description="",
 361             steps=(
 362                 # The list of input prompts to ask the user.
 363                 wiz.WizardStep(
 364                     # ID where the value will be stored
 365                     id="metric_accuracy",
 366                     # Display name
 367                     name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 368                     # Help message
 369                     help="Enter 1-5: 1 being least important, and 5 being most important",
 370                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 371                     default='1'
 372                 ),
 373                 wiz.WizardStep(
 374                     # ID where the value will be stored
 375                     id="metric_speed",
 376                     # Display name
 377                     name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 378                     # Help message
 379                     help="Enter 1-5: 1 being least important, and 5 being most important",
 380                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 381                     default='1'
 382                 ),
 383                 wiz.WizardStep(
 384                     # ID where the value will be stored
 385                     id="metric_interpretability",
 386                     # Display name
 387                     name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 388                     # Help message
 389                     help="Enter 1-5: 1 being least important, and 5 being most important",
 390                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 391                     default='1'
 392                 ),
 393                 wiz.WizardStep(
 394                     # ID where the value will be stored
 395                     id="metric_reproducibility",
 396                     # Display name
 397                     name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 398                     # Help message
 399                     help="Enter 1-5: 1 being least important, and 5 being most important",
 400                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 401                     default='1'
 402                 ),
 403                 wiz.WizardStep(
 404                     # ID where the value will be stored
 405                     id="metric_implementation",
 406                     # Display name
 407                     name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
 408                     # Help message
 409                     help="Enter 1-5: 1 being least important, and 5 being most important",
 410                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 411                     default='1'
 412                 ),
 413             )
 414         )
 415
 416     def gen_data_main_wizard(self):
 417         """
 418         Generic Wizard - Goal, metrics, data
 419         """
 420         self.wiz_generic_data_main = wiz.PromptWizard(
 421             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 422             description="",
 423             steps=(
 424                 # The list of input prompts to ask the user.
 425                 wiz.WizardStep(
 426                     # ID where the value will be stored
 427                     id="data_column",
 428                     # Display name
 429                     name=Bcolors.HEADER+" What does the data (columns) represent? Please type help and select the associated number"+Bcolors.ENDC,
 430                     # Help message
 431                     help="1. Well Defined Features\n 2. Signals - Timeseries, pixels, etc\n 3. Text - Unstructured\n 4. None of the above\n",
 432                     validators=(wiz.required_validator, wiz.int_validator(1, 4)),
 433                     default='1'
 434                 ),
 435             )
 436         )
 437
 438     def gen_data_signal_wizard(self):
 439         """
 440         Generic Wizard - Goal, metrics, data
 441         """
 442         self.wiz_generic_data_signal = wiz.PromptWizard(
 443             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 444             description="",
 445             steps=(
 446                 # The list of input prompts to ask the user.
 447                 wiz.WizardStep(
 448                     # ID where the value will be stored
 449                     id="data_signal_type",
 450                     # Display name
 451                     name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? Please type help for list "+Bcolors.ENDC,
 452                     # Help message
 453                     help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n  ",
 454                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 455                     default='3'
 456                 ),
 457             )
 458         )
 459
 460     def gen_data_features_wizard(self):
 461         """
 462         Generic Wizard - Goal, metrics, data
 463         """
 464         self.wiz_generic_data_features = wiz.PromptWizard(
 465             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 466             description="",
 467             steps=(
 468                 # The list of input prompts to ask the user.
 469                 wiz.WizardStep(
 470                     # ID where the value will be stored
 471                     id="data_features",
 472                     # Display name
 473                     name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC,
 474                     # Help message
 475                     help="Y/N",
 476                     validators=(wiz.required_validator, wiz.boolean_validator),
 477                     default='Y'
 478                 ),
 479                 wiz.WizardStep(
 480                     # ID where the value will be stored
 481                     id="data_features_count",
 482                     # Display name
 483                     name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC,
 484                     # Help message
 485                     help="Number only - Approximate should be OK.",
 486                     validators=(wiz.required_validator, wiz.int_validator(1, 100000)),
 487                     default='10'
 488                 ),
 489             )
 490         )
 491
 492     def gen_data_text_wizard(self):
 493         """
 494         Generic Wizard - Goal, metrics, data
 495         """
 496         self.wiz_generic_data_text = wiz.PromptWizard(
 497             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
 498             description="",
 499             steps=(
 500                 # The list of input prompts to ask the user.
 501                 wiz.WizardStep(
 502                     # ID where the value will be stored
 503                     id="data_text_type",
 504                     # Display name
 505                     name=Bcolors.HEADER+" If Text, can you choose any one from the below list? Please type help for list"+Bcolors.ENDC,
 506                     # Help message
 507                     help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n  ",
 508                     validators=(wiz.required_validator, wiz.int_validator(1, 8)),
 509                     default='3'
 510                 ),
 511
 512             )
 513         )
 514
 515     def gen_about_data_basic_wizard(self):
 516         """
 517         Generic Wizard - Goal, metrics, data
 518         """
 519         self.wiz_generic_data_basic = wiz.PromptWizard(
 520             name=Bcolors.OKBLUE+"Understanding Basic Input Data Information"+Bcolors.ENDC,
 521             description="",
 522             steps=(
 523                 # The list of input prompts to ask the user.
 524                 wiz.WizardStep(
 525                     # ID where the value will be stored
 526                     id="data_missing",
 527                     # Display name
 528                     name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC,
 529                     # Help message
 530                     help="Y/N",
 531                     validators=(wiz.required_validator, wiz.boolean_validator),
 532                     default='N'
 533                 ),
 534                 wiz.WizardStep(
 535                     # ID where the value will be stored
 536                     id="data_size_bytes",
 537                     # Display name
 538                     name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC,
 539                     # Help message
 540                     help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes",
 541                     validators=(wiz.required_validator),
 542                     default='1G'
 543                 ),
 544                 wiz.WizardStep(
 545                     # ID where the value will be stored
 546                     id="data_size_samples",
 547                     # Display name
 548                     name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC,
 549                     # Help message
 550                     help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples",
 551                     validators=(wiz.required_validator),
 552                     default='1M'
 553                 ),
 554             )
 555         )
 556
 557     def gen_about_data_advanced_wizard(self):
 558         """
 559         Generic Wizard - Goal, metrics, data
 560         """
 561         self.wiz_generic_data_adv = wiz.PromptWizard(
 562             name=Bcolors.OKBLUE+"Understanding Advanced Input Data Information"+Bcolors.ENDC,
 563             description="",
 564             steps=(
 565                 # The list of input prompts to ask the user.
 566                 wiz.WizardStep(
 567                     # ID where the value will be stored
 568                     id="data_distribution",
 569                     # Display name
 570                     name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC,
 571                     # Help message
 572                     help="Y/N - Yes",
 573                     validators=(wiz.required_validator, wiz.boolean_validator),
 574                     default='Y'
 575                 ),
 576                 wiz.WizardStep(
 577                     # ID where the value will be stored
 578                     id="data_io_relation",
 579                     # Display name
 580                     name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC,
 581                     # Help message
 582                     help="Y/N - Yes/No",
 583                     validators=(wiz.required_validator, wiz.boolean_validator),
 584                     default='Y'
 585                 ),
 586                 wiz.WizardStep(
 587                     # ID where the value will be stored
 588                     id="data_correlation",
 589                     # Display name
 590                     name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC,
 591                     # Help message
 592                     help="Y/N/ - Yes/No ",
 593                     validators=(wiz.required_validator, wiz.boolean_validator),
 594                     default='Y'
 595                 ),
 596                 wiz.WizardStep(
 597                     # ID where the value will be stored
 598                     id="data_cond_indep",
 599                     # Display name
 600                     name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC,
 601                     # Help message
 602                     help="Y/N/. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent",
 603                     validators=(wiz.required_validator, wiz.boolean_validator),
 604                     default='Y'
 605                 ),
 606             )
 607         )
 608
 609     def gen_about_output_wizard(self):
 610         """
 611         Generic Wizard - Goal, metrics, data
 612         """
 613         self.wiz_generic_data_output = wiz.PromptWizard(
 614             name=Bcolors.OKBLUE+"Understanding Data Output"+Bcolors.ENDC,
 615             description="",
 616             steps=(
 617                 # The list of input prompts to ask the user.
 618                 wiz.WizardStep(
 619                     # ID where the value will be stored
 620                     id="data_type_output",
 621                     # Display name
 622                     name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
 623                     # Help message
 624                     help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass",
 625                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
 626                     default='1'
 627                 ),
 628                 wiz.WizardStep(
 629                     # ID where the value will be stored
 630                     id="data_output_prob",
 631                     # Display name
 632                     name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC,
 633                     # Help message
 634                     help="Y/N",
 635                     validators=(wiz.required_validator, wiz.boolean_validator),
 636                     default='N'
 637                 ),
 638             )
 639         )
 640
 641
 642     def unsupervised_wizard(self):
 643         """
 644         The Un-Supervized Learning Wizard
 645         """
 646         self.wiz_unsupervised = wiz.PromptWizard(
 647             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
 648             description="",
 649             steps=(
 650                 # The list of input prompts to ask the user.
 651                 wiz.WizardStep(
 652                     # ID where the value will be stored
 653                     id="unsup_goal",
 654                     # Display name
 655                     name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC,
 656                     # Help message
 657                     help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n",
 658                     validators=(wiz.required_validator, wiz.int_validator(1, 3)),
 659                     default='1'
 660                 ),
 661                 wiz.WizardStep(
 662                     # ID where the value will be stored
 663                     id="unsup_dr_topic_mod",
 664                     # Display name
 665                     name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC,
 666                     # Help message
 667                     help="Y/N/NA",
 668                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 669                                                                               'y','n','na','nA'])),
 670                     default='NA'
 671                 ),
 672                 wiz.WizardStep(
 673                     # ID where the value will be stored
 674                     id="unsup_clus_dv",
 675                     # Display name
 676                     name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
 677                     # Help message
 678                     help="Y/N/NA",
 679                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 680                                                                               'y','n','na','nA'])),
 681                     default='NA'
 682                 ),
 683                 wiz.WizardStep(
 684                     # ID where the value will be stored
 685                     id="unsup_clus_outliers",
 686                     # Display name
 687                     name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
 688                     # Help message
 689                     help="Y/N/NA",
 690                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 691                                                                               'y','n','na','nA'])),
 692                     default='NA'
 693                 ),
 694                 wiz.WizardStep(
 695                     # ID where the value will be stored
 696                     id="unsup_clus_groups",
 697                     # Display name
 698                     name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC,
 699                     # Help message
 700                     help="Y/N/NA",
 701                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 702                                                                               'y','n','na','nA'])),
 703                     default='NA'
 704                 ),
 705
 706             )
 707         )
 708
 709     def reinforcement_wizard(self):
 710         """
 711         The Reinforced Learning Wizard
 712         """
 713         message = """
 714             Reward  |--------|
 715             |-------| Agent  |  Action
 716             | |-----|        |-------|
 717             | |     |--------|       |
 718             | |state                 |
 719             | |                      |
 720             | |    |-----------|     |
 721             | |----|Environment|     |
 722             |------|           |-----|
 723                    |-----------|
 724             """
 725         self.wiz_reinforcement = wiz.PromptWizard(
 726             name=Bcolors.OKBLUE+"Reinforcement Specific"+Bcolors.ENDC,
 727             description="",
 728             steps=(
 729                 # The list of input prompts to ask the user.
 730                 wiz.WizardStep(
 731                     # ID where the value will be stored
 732                     id="ri_info",
 733                     # Display name
 734                     name=Bcolors.HEADER+" Type help for reference diagram for reinforcement-learning"+Bcolors.ENDC,
 735                     # Help message
 736                     help=message,
 737                     validators=(wiz.required_validator),
 738                     default='Type Help or Press Enter'
 739                 ),
 740                 wiz.WizardStep(
 741                     # ID where the value will be stored
 742                     id="ri_model_preference",
 743                     # Display name
 744                     name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC,
 745                     # Help message
 746                     help="Y/N/NA",
 747                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 748                                                                               'y','n','na','nA'])),
 749                     default='Y'
 750                 ),
 751                 wiz.WizardStep(
 752                     # ID where the value will be stored
 753                     id="ri_model_availability",
 754                     # Display name
 755                     name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
 756                     # Help message
 757                     help="Y/N/NA",
 758                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 759                                                                               'y','n','na','nA'])),
 760                     default='Y'
 761                 ),
 762                 wiz.WizardStep(
 763                     # ID where the value will be stored
 764                     id="ri_modelfree_value",
 765                     # Display name
 766                     name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
 767                     # Help message
 768                     help="Y/N/NA",
 769                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 770                                                                               'y','n','na','nA'])),
 771                     default='Y'
 772                 ),
 773                 wiz.WizardStep(
 774                     # ID where the value will be stored
 775                     id="ri_modelfree_value_state",
 776                     # Display name
 777                     name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC,
 778                     # Help message
 779                     help="Y/N/NA",
 780                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
 781                                                                               'y','n','na','nA'])),
 782                     default='Y'
 783                 ),
 784                 wiz.WizardStep(
 785                     # ID where the value will be stored
 786                     id="ri_app_domain",
 787                     # Display name
 788                     name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
 789                     # Help message
 790                     help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n",
 791                     validators=(wiz.required_validator, wiz.int_validator(1, 8)),
 792                     default='1'
 793                 ),
 794             )
 795         )
 796
 797     ############### All the Run Operations ######################
 798     def run_mainwiz(self):
 799         """
 800         Run the Main Wizard
 801         """
 802         self.main_wizard_l1()
 803         self.main_l1_values = self.wiz_main_l1.run(self.shell)
 804         if self.main_l1_values['data_availability']:
 805             print("OK-1")
 806             self.main_wizard_l2_b()
 807             self.main_l2b_values = self.wiz_main_l2_b.run(self.shell)
 808             if self.main_l2b_values['data_label']:
 809                 self.supervised = True
 810             else:
 811                 self.unsupervised = True
 812             if self.main_l2b_values['data_programmability']:
 813                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
 814             elif self.main_l2b_values['data_generation']:
 815                 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
 816                 self.ml_needed = True
 817                 self.ml_gans = True
 818             else:
 819                 self.main_wizard_l3()
 820                 self.main_l3_values = self.wiz_main_l3.run(self.shell)
 821                 if self.main_l3_values['data_knowledge']:
 822                     print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
 823                     self.ml_needed = True
 824                 else:
 825                     self.main_wizard_l4()
 826                     self.main_l4_values = self.wiz_main_l4.run(self.shell)
 827                     if self.main_l4_values['data_pattern']:
 828                         print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
 829                         self.ml_needed = True
 830                     else:
 831                         print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
 832         else:
 833             self.main_wizard_l2_a()
 834             self.main_l2a_values = self.wiz_main_l2_a.run(self.shell)
 835             if self.main_l2a_values['data_creativity']:
 836                 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
 837                 self.ml_needed = True
 838                 self.reinforcement = True
 839             else:
 840                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
 841
 842     def run_gans_wizard(self):
 843         """
 844         Run GANs wizard
 845         """
 846         self.gans_wizard()
 847         self.gans_values = self.wiz_gans.run(self.shell)
 848         if self.gans_values['gans_data_type']:
 849             if self.gans_values['gans_data_variables']:
 850                 print("GANs technique to consider: TTS-GAN")
 851             else:
 852                 print("GANs technique to consider: TimeGAN")
 853         else:
 854             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
 855
 856
 857
 858
 859     def run_generic_wizard(self):
 860         """
 861         Run Generic Wizard
 862         """
 863         self.gen_choice_wizard()
 864         self.gen_choice_values = self.wiz_generic_choice.run(self.shell)
 865         if self.gen_choice_values['data_metrics_pref']:
 866             self.gen_metrics_wizard()
 867             self.gen_metrics_values = self.wiz_generic_metrics.run(self.shell)
 868         if self.gen_choice_values['data_main']:
 869             self.gen_data_main_wizard()
 870             self.gen_data_main_values = self.wiz_generic_data_main.run(self.shell)
 871             if int(self.gen_data_main_values['data_column']) == 3:
 872                 self.gen_data_text_wizard()
 873                 self.gen_data_text_values = self.wiz_generic_data_text.run(self.shell)
 874             else:
 875                 self.gen_data_text_values = {'data_text_type': '3'}
 876             if int(self.gen_data_main_values['data_column']) == 1:
 877                 self.gen_data_features_wizard()
 878                 self.gen_data_features_values = self.wiz_generic_data_features.run(self.shell)
 879             else:
 880                 self.gen_data_features_values = {'data_features': 'Y',
 881                                                  'data_features_count': '10'}
 882             if int(self.gen_data_main_values['data_column']) == 2:
 883                 self.gen_data_signal_wizard()
 884                 self.gen_data_signal_values = self.wiz_generic_data_signal.run(self.shell)
 885             else:
 886                 self.gen_data_signal_values = {'data_signal_type': '1'}
 887         else:
 888             self.gen_data_main_values = {'data_column': '1'}
 889             print("Unknown Data Type")
 890         if self.gen_choice_values['data_databasic_pref']:
 891             self.gen_about_data_basic_wizard()
 892             self.gen_about_data_basic_values = self.wiz_generic_data_basic.run(self.shell)
 893         else:
 894             self.gen_about_data_basic_values = {'data_missing':'N',
 895                                                 'data_size_bytes': '1G',
 896                                                 'data_size_samples': '1M'}
 897         if self.gen_choice_values['data_dataadv_pref']:
 898             self.gen_about_data_advanced_wizard()
 899             self.gen_about_data_adv_values = self.wiz_generic_data_adv.run(self.shell)
 900         else:
 901             self.gen_about_data_adv_values = {'data_distribution': 'N',
 902                                               'data_io_relation': 'N',
 903                                               'data_correlation': 'N',
 904                                               'data_cond_indep': 'N'}
 905         if self.gen_choice_values['data_dataoutput_pref']:
 906             self.gen_about_output_wizard()
 907             self.gen_about_data_output_values = self.wiz_generic_data_output.run(self.shell)
 908         else:
 909             self.gen_about_data_output_values = {'data_type_output': '1',
 910                                                  'data_output_prob': 'N'}
 911
 912
 913     def run_unsupervised_wizard(self):
 914         """
 915         Run UnSupervised Learning Wizard.
 916         """
 917         self.unsupervised_wizard()
 918         self.unsup_values = self.wiz_unsupervised.run(self.shell)
 919
 920     def run_reinforcement_wizard(self):
 921         """
 922         Run Reinforced Learning Wizard
 923         """
 924         self.reinforcement_wizard()
 925         self.ri_values = self.wiz_reinforcement.run(self.shell)
 926
 927     def decide_unsupervised(self):
 928         """
 929         Decide which Unsupervised-learning to use
 930         """
 931         repro = False
 932         clus_prob = False
 933         if int(self.unsup_values['unsup_goal']) == 1:
 934             # Clustering
 935             if 'high' in self.data_size:
 936                 if not self.reproducibility:
 937                     clus_prob = True
 938                 else:
 939                     repro = True
 940             else:
 941                 if 'y' in self.unsup_values['unsup_clus_dv'].lower():
 942                     if 'y' in self.unsup_values['unsup_clus_groups'].lower():
 943                         clus_prob = True
 944                     else:
 945                         print("Unsupervised Learning model to consider: Hierarchical Clustering")
 946                         return
 947                 else:
 948                     repro = True
 949             if repro:
 950                 if 'y' in self.unsup_values['unsup_clus_outliers'].lower():
 951                     print("Unsupervised Learning model to consider: Hierarchical Clustering")
 952                 else:
 953                     print("Unsupervised Learning model to consider: DBSCAN")
 954                 return
 955             if clus_prob:
 956                 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
 957                     print("Unsupervised Learning model to consider: Gaussian Mixture")
 958                 else:
 959                     print("Unsupervised Learning model to consider: KMeans")
 960                 return
 961         elif int(self.unsup_values['unsup_goal']) == 2:
 962             # Dimensionality Reduction
 963             if 'y' in self.unsup_values['unsup_dr_topic_mod'].lower():
 964                 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
 965                     print("Unsupervised Learning model to consider: SVD")
 966                 else:
 967                     print("Unsupervised Learning model to consider: LDA")
 968             else:
 969                 print("Unsupervised Learning model to consider: PCA")
 970         else:
 971             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
 972
 973     def decide_reinforcement(self):
 974         """
 975         Decide which reinforement learning to use.
 976         """
 977         if (int(self.gen_about_data_output_values['data_type_output']) == 2 or
 978                 'y' in self.ri_values['ri_model_preference'].lower()):
 979             # Model Bsaed
 980             if 'y' in self.ri_values['ri_model_availability'].lower():
 981                 print("Reinforcement Learning model to consider - AlphaZero")
 982             else:
 983                 print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE")
 984         elif 'n' in self.ri_values['ri_model_preference'].lower():
 985             # Model-Free based approach.
 986             if 'y' not in self.ri_values['ri_modelfree_value'].lower():
 987                 print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic")
 988             else:
 989                 if 'y' in self.ri_values['ri_modelfree_value_state'].lower():
 990                     print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)")
 991                 else:
 992                     print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets")
 993         else:
 994             # Default
 995             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
 996
 997     def perform_inference(self):
 998         """
 999         Perform Inferences. Used across all 3 types.
1000         """
1001         # Decide whether data is Low or High
1002         self.data_size = 'unknown'
1003         if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
1004                 't' in self.gen_about_data_basic_values['data_size_samples']):
1005             self.data_size = 'low'
1006
1007         if int(self.gen_metrics_values['metric_interpretability']) >= 3 :
1008             self.interpretability = True
1009         if int(self.gen_metrics_values['metric_speed']) >= 3 :
1010             self.faster = True
1011         if int(self.gen_metrics_values['metric_reproducibility']) >= 3 :
1012             self.reproducibility = True
1013
1014         # Decide Features relative to Data (ftod_ratio) - high/low
1015         if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
1016                 't' in self.gen_about_data_basic_values['data_size_samples']):
1017             if int(self.gen_data_features_values['data_features_count']) > 50:
1018                 self.ftod_ratio = 'high'
1019         elif ('m' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
1020                 'm' in self.gen_about_data_basic_values['data_size_samples']):
1021             if int(self.gen_data_features_values['data_features_count']) > 5000:
1022                 self.ftod_ratio = 'high'
1023         else:
1024             if int(self.gen_data_features_values['data_features_count']) > 500000:
1025                 self.ftod_ratio = 'high'
1026
1027
1028     def decide_supervised(self):
1029         """
1030         Decide which Supervised learning to use.
1031         """
1032         if 'high' in self.data_size:
1033             # Cover: DT, RF, RNN, CNN, ANN and Naive Bayes
1034             if self.interpretability:
1035                 if self.faster:
1036                     print("Supervised Learning model to consider  - Decision Tree")
1037                 else:
1038                     print("Supervised Learning model to consider  - Random Forest")
1039             else:
1040                 if int(self.gen_data_main_values['data_column']) == 3:
1041                     print("Supervised Learning model to consider  - RNN")
1042                 elif (int(self.gen_data_main_values['data_column']) == 2 and
1043                         int(self.gen_data_signal_values['data_signal_type']) == 1):
1044                     print("Supervised Learning model to consider  - CNN")
1045                 elif (int(self.gen_data_main_values['data_column']) == 2 and
1046                         (int(self.gen_data_signal_values['data_signal_type']) == 2 or
1047                             int(self.gen_data_signal_values['data_signal_type']) == 3)):
1048                     if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
1049                         print("Supervised Learning model to consider  - Naive Bayes")
1050                     else:
1051                         print("Supervised Learning model to consider  - ANN")
1052                 else:
1053                     print("Supervised model to consider  Learning - ANN")
1054         elif 'low' in self.data_size:
1055             from_b = False
1056             # Cover: Regressions
1057             if 'high' in self.ftod_ratio:
1058                 from_b = True
1059             else:
1060                 print("Supervised Learning model to consider  - SVN with Gaussian Kernel")
1061                 return
1062             if int(self.gen_about_data_output_values['data_type_output']) != 2:
1063                 from_b = True
1064             else:
1065                 if 'y' in self.gen_about_data_adv_values['data_io_relation'].lower():
1066                     print("Supervised Learning model to consider  - Linear Regression or Linear SVM")
1067                 else:
1068                     print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
1069                 return
1070             if from_b:
1071                 if int(self.gen_about_data_output_values['data_output_type']) == 4:
1072                     if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
1073                         if 'y' in self.gen_about_data_adv_values['data_cond_indep'].lower():
1074                             print("Supervised Learning model to consider  - Naive Bayes")
1075                         else:
1076                             if 'y' in self.gen_about_data_adv_values['data_correlation'].lower():
1077                                 print("Supervised Learning model to consider  - LASSO or Ridge Regression")
1078                             else:
1079                                 print("Supervised Learning model to consider  - Logistic Regression")
1080                     else:
1081                         print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
1082
1083                 else:
1084                     print("Supervised Learning model to consider - KNN")
1085         else:
1086             # Default
1087             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
1088
1089     def ask_and_decide(self):
1090         """
1091         THe Main Engine
1092         """
1093         self.run_mainwiz()
1094         if self.ml_gans:
1095             self.run_gans_wizard()
1096             return
1097         if self.ml_needed:
1098             self.run_generic_wizard()
1099             if self.supervised:
1100                 self.decide_supervised()
1101             elif self.unsupervised:
1102                 self.run_unsupervised_wizard()
1103                 self.decide_unsupervised()
1104             elif self.reinforcement:
1105                 self.run_reinforcement_wizard()
1106                 self.decide_reinforcement()
1107
1108
1109 def signal_handler(signum, frame):
1110     """
1111     Signal Handler
1112     """
1113     print("\n You interrupted, No Suggestion will be provided!")
1114     print(signum, frame)
1115     sys.exit(0)
1116
1117 def main():
1118     """
1119     The Main Function
1120     """
1121     try:
1122         algowiz = AlgoSelectorWizard()
1123         algowiz.ask_and_decide()
1124     except(KeyboardInterrupt, MemoryError):
1125         print("Some Error Occured - No Suggestion can be provided")
1126
1127     print("Thanks for using the Algoselector-Wizard, " +
1128             "Hope our suggestion will be useful")
1129
1130 if __name__ == "__main__":
1131     signal.signal(signal.SIGINT, signal_handler)
1132     main()