a3a0b0b6a4109590f3e7ad99267507df9595624c
[thoth.git] / tools / modelselector / modelselector.py
1 # Copyright 2022 Linux Foundation.
2 # srao@linuxfoundation.org
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 #   http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16 """
17 Tool to suggest which ML approach is more applicable for
18 a particular data and usecase.
19 TODO:
20 1. Minimize code.
21 2. Add Informative data to the user.
22 3. Check for Size Entry - 1G/K ..
23 """
24
25 from __future__ import print_function
26 import signal
27 import sys
28 from pypsi import wizard as wiz
29 from pypsi.shell import Shell
30
31 # pylint: disable=line-too-long,too-few-public-methods,too-many-instance-attributes, too-many-nested-blocks, too-many-return-statements, too-many-branches
32
33 class Bcolors:
34     """
35     For Coloring
36     """
37     HEADER = '\033[95m'
38     OKBLUE = '\033[94m'
39     OKGREEN = '\033[92m'
40     WARNING = '\033[93m'
41     FAIL = '\033[91m'
42     ENDC = '\033[0m'
43     BOLD = '\033[1m'
44     UNDERLINE = '\033[4m'
45
46 class AlgoSelectorWizard():
47     """
48     Class to create wizards
49     """
50     def __init__(self):
51         """
52         Perform Initialization.
53         """
54         self.shell = Shell()
55         # Set of all values from the user
56         self.main_values = {}
57         self.main_l1_values = {}
58         self.main_l2a_values = {}
59         self.main_l2b_values = {}
60         self.main_l3_values = {}
61         self.main_l4_values = {}
62         self.unsup_values = {}
63         self.ri_values = {}
64         self.gen_values = {}
65         self.gen_choice_values = {}
66         self.gen_metrics_values = {}
67         self.gen_data_main_values = {}
68         self.gen_data_text_values = {}
69         self.gen_data_features_values = {}
70         self.gen_data_signal_values = {}
71         self.gen_about_data_basic_values = {}
72         self.gen_about_data_adv_values = {}
73         self.gen_about_data_output_values = {}
74         # Set of Wizards.
75         self.wiz_main = None
76         self.wiz_main_l1 = None
77         self.wiz_main_l2_a = None
78         self.wiz_main_l2_b = None
79         self.wiz_main_l3 = None
80         self.wiz_main_l4 = None
81         self.wiz_generic = None
82         self.wiz_generic_choice = None
83         self.wiz_geneirc_metric = None
84         self.wiz_generic_data_main = None
85         self.wiz_generic_data_signal = None
86         self.wiz_generic_data_features = None
87         self.wiz_generic_data_text = None
88         self.wiz_generic_data_basic = None
89         self.wiz_generic_data_adv = None
90         self.wiz_generic_data_output = None
91         self.wiz_unsupervised = None
92         self.wiz_reinforcement = None
93         # Some Inferences
94         self.ml_needed = False
95         self.supervised = False
96         self.unsupervised = False
97         self.reinforcement = False
98         self.data_size = 'high'
99         self.interpretability = False
100         self.faster = False
101         self.ftod_ratio = 'low'
102         self.reproducibility = False
103
104
105     ############# All the Wizards ##################################
106
107     ### GENERIC Wizards - Need for ML ##############################
108     def main_wizard_l1(self):
109         """
110         The Main Wizard L1
111         """
112         self.wiz_main_l1 = wiz.PromptWizard(
113             name=Bcolors.OKBLUE+"Do you Need ML - Data Availability"+Bcolors.ENDC,
114             description="",
115             steps=(
116                 # The list of input prompts to ask the user.
117                 wiz.WizardStep(
118                     # ID where the value will be stored
119                     id="data_availability",
120                     # Display name
121                     name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC,
122                     # Help message
123                     help="Y/N - Yes/No",
124                     validators=(wiz.required_validator, wiz.boolean_validator),
125                     default='Y',
126                 ),
127             )
128         )
129
130     def main_wizard_l2_a(self):
131         """
132         The Main Wizard L2-A
133         """
134         self.wiz_main_l2_a = wiz.PromptWizard(
135             name=Bcolors.OKBLUE+"Do you Need ML - Data Creation"+Bcolors.ENDC,
136             description="",
137             steps=(
138                 # The list of input prompts to ask the user.
139                 wiz.WizardStep(
140                     # ID where the value will be stored
141                     id="data_creativity",
142                     # Display name
143                     name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC,
144                     # Help message
145                     help="Y/N - Yes/No",
146                     validators=(wiz.required_validator, wiz.boolean_validator),
147                     default='Y',
148                 ),
149             )
150         )
151
152     def main_wizard_l2_b(self):
153         """
154         The Main Wizard L2-B
155         """
156         label = """ One or more meaningful and informative 'tag' to provide context so that a machine learning model can learn from it. For example, labels might indicate whether a photo contains a bird or car, which words were uttered in an audio recording, or if an x-ray contains a tumor. Data labeling is required for a variety of use cases including computer vision, natural language processing, and speech recognition."""
157         self.wiz_main_l2_b = wiz.PromptWizard(
158             name=Bcolors.OKBLUE+"Do you Need ML - Data Programmability"+Bcolors.ENDC,
159             description="",
160             steps=(
161                 # The list of input prompts to ask the user.
162                 wiz.WizardStep(
163                     # ID where the value will be stored
164                     id="data_label",
165                     # Display name
166                     name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N - Yes/No). Type help for description of label. "+Bcolors.ENDC,
167                     # Help message
168                     help=label,
169                     validators=(wiz.required_validator, wiz.boolean_validator),
170                     default='Y',
171                 ),
172                 wiz.WizardStep(
173                     # ID where the value will be stored
174                     id="data_programmability",
175                     # Display name
176                     name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
177                     # Help message
178                     help="Y/N - Yes/No",
179                     validators=(wiz.required_validator, wiz.boolean_validator),
180                     default='N',
181                 ),
182             )
183         )
184
185
186     def main_wizard_l3(self):
187         """
188         The Main Wizard L3
189         """
190         self.wiz_main_l3 = wiz.PromptWizard(
191             name=Bcolors.OKBLUE+"Do you Need ML - Data Knowledge"+Bcolors.ENDC,
192             description="",
193             steps=(
194                 # The list of input prompts to ask the user.
195                 wiz.WizardStep(
196                     # ID where the value will be stored
197                     id="data_knowledge",
198                     # Display name
199                     name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
200                     # Help message
201                     help="Y/N - Yes/No",
202                     validators=(wiz.required_validator, wiz.boolean_validator),
203                     default='Y',
204                 ),
205             )
206         )
207
208     def main_wizard_l4(self):
209         """
210         The Main Wizard - L4
211         """
212         self.wiz_main_l4 = wiz.PromptWizard(
213             name=Bcolors.OKBLUE+"Do you Need ML - Data Pattern"+Bcolors.ENDC,
214             description="",
215             steps=(
216                 # The list of input prompts to ask the user.
217                 wiz.WizardStep(
218                     # ID where the value will be stored
219                     id="data_pattern",
220                     # Display name
221                     name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC,
222                     # Help message
223                     help="Y/N - Yes/No.",
224                     validators=(wiz.required_validator, wiz.boolean_validator),
225                     default='Y'
226                 ),
227             )
228         )
229     ### GENERIC Wizards - GOAL, METRICS, DATA ##############################
230     def gen_choice_wizard(self):
231         """
232         Generic Wizard - Goal, metrics, data
233         """
234         self.wiz_generic_choice = wiz.PromptWizard(
235             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
236             description="",
237             steps=(
238                 # The list of input prompts to ask the user.
239                 wiz.WizardStep(
240                     # ID where the value will be stored
241                     id="data_goal",
242                     # Display name
243                     name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC,
244                     # Help message
245                     help="Enter one of Predict/Describe/Explore",
246                     validators=(wiz.required_validator, wiz.choice_validator(['Predict',
247                                                                               'predict',
248                                                                               'Describe',
249                                                                               'describe',
250                                                                               'Explore',
251                                                                               'explore'])),
252                     default='Explore'
253                 ),
254                 wiz.WizardStep(
255                     # ID where the value will be stored
256                     id="data_metrics_pref",
257                     # Display name
258                     name=Bcolors.HEADER+" Do you know which metrics (speed, accuracy, etc.) are more important for you? "+Bcolors.ENDC,
259                     # Help message
260                     help="Y/N - Yes/No",
261                     validators=(wiz.required_validator, wiz.boolean_validator),
262                     default='Y'
263                 ),
264                 wiz.WizardStep(
265                     # ID where the value will be stored
266                     id="data_main",
267                     # Display name
268                     name=Bcolors.HEADER+" Do you know about the input data type (If its signal/features/text)  ?  "+Bcolors.ENDC,
269                     # Help message
270                     help="Y/N - Yes/No",
271                     validators=(wiz.required_validator, wiz.boolean_validator),
272                     default='Y'
273                 ),
274                 wiz.WizardStep(
275                     # ID where the value will be stored
276                     id="data_databasic_pref",
277                     # Display name
278                     name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the input data? "+Bcolors.ENDC,
279                     # Help message
280                     help="Y/N - Yes/No",
281                     validators=(wiz.required_validator, wiz.boolean_validator),
282                     default='Y'
283                 ),
284                 wiz.WizardStep(
285                     # ID where the value will be stored
286                     id="data_dataadv_pref",
287                     # Display name
288                     name=Bcolors.HEADER+" Do you have advanced information (distribution, relation, independency, etc.) about the input data? "+Bcolors.ENDC,
289                     # Help message
290                     help="Y/N - Yes/No",
291                     validators=(wiz.required_validator, wiz.boolean_validator),
292                     default='Y'
293                 ),
294                 wiz.WizardStep(
295                     # ID where the value will be stored
296                     id="data_dataoutput_pref",
297                     # Display name
298                     name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the output? "+Bcolors.ENDC,
299                     # Help message
300                     help="Y/N - Yes/No",
301                     validators=(wiz.required_validator, wiz.boolean_validator),
302                     default='Y'
303                 ),
304             )
305         )
306
307     def gen_metrics_wizard(self):
308         """
309         Generic Wizard - Goal, metrics, data
310         """
311         self.wiz_generic_metrics = wiz.PromptWizard(
312             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
313             description="",
314             steps=(
315                 # The list of input prompts to ask the user.
316                 wiz.WizardStep(
317                     # ID where the value will be stored
318                     id="metric_accuracy",
319                     # Display name
320                     name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
321                     # Help message
322                     help="Enter 1-5: 1 being least important, and 5 being most important",
323                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
324                     default='1'
325                 ),
326                 wiz.WizardStep(
327                     # ID where the value will be stored
328                     id="metric_speed",
329                     # Display name
330                     name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
331                     # Help message
332                     help="Enter 1-5: 1 being least important, and 5 being most important",
333                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
334                     default='1'
335                 ),
336                 wiz.WizardStep(
337                     # ID where the value will be stored
338                     id="metric_interpretability",
339                     # Display name
340                     name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
341                     # Help message
342                     help="Enter 1-5: 1 being least important, and 5 being most important",
343                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
344                     default='1'
345                 ),
346                 wiz.WizardStep(
347                     # ID where the value will be stored
348                     id="metric_reproducibility",
349                     # Display name
350                     name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
351                     # Help message
352                     help="Enter 1-5: 1 being least important, and 5 being most important",
353                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
354                     default='1'
355                 ),
356                 wiz.WizardStep(
357                     # ID where the value will be stored
358                     id="metric_implementation",
359                     # Display name
360                     name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
361                     # Help message
362                     help="Enter 1-5: 1 being least important, and 5 being most important",
363                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
364                     default='1'
365                 ),
366             )
367         )
368
369     def gen_data_main_wizard(self):
370         """
371         Generic Wizard - Goal, metrics, data
372         """
373         self.wiz_generic_data_main = wiz.PromptWizard(
374             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
375             description="",
376             steps=(
377                 # The list of input prompts to ask the user.
378                 wiz.WizardStep(
379                     # ID where the value will be stored
380                     id="data_column",
381                     # Display name
382                     name=Bcolors.HEADER+" What does the data (columns) represent? Please type help and select the associated number"+Bcolors.ENDC,
383                     # Help message
384                     help="1. Well Defined Features\n 2. Signals - Timeseries, pixels, etc\n 3. Text - Unstructured\n 4. None of the above\n",
385                     validators=(wiz.required_validator, wiz.int_validator(1, 4)),
386                     default='1'
387                 ),
388             )
389         )
390
391     def gen_data_signal_wizard(self):
392         """
393         Generic Wizard - Goal, metrics, data
394         """
395         self.wiz_generic_data_signal = wiz.PromptWizard(
396             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
397             description="",
398             steps=(
399                 # The list of input prompts to ask the user.
400                 wiz.WizardStep(
401                     # ID where the value will be stored
402                     id="data_signal_type",
403                     # Display name
404                     name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? Please type help for list "+Bcolors.ENDC,
405                     # Help message
406                     help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n  ",
407                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
408                     default='3'
409                 ),
410             )
411         )
412
413     def gen_data_features_wizard(self):
414         """
415         Generic Wizard - Goal, metrics, data
416         """
417         self.wiz_generic_data_features = wiz.PromptWizard(
418             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
419             description="",
420             steps=(
421                 # The list of input prompts to ask the user.
422                 wiz.WizardStep(
423                     # ID where the value will be stored
424                     id="data_features",
425                     # Display name
426                     name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC,
427                     # Help message
428                     help="Y/N",
429                     validators=(wiz.required_validator, wiz.boolean_validator),
430                     default='Y'
431                 ),
432                 wiz.WizardStep(
433                     # ID where the value will be stored
434                     id="data_features_count",
435                     # Display name
436                     name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC,
437                     # Help message
438                     help="Number only - Approximate should be OK.",
439                     validators=(wiz.required_validator, wiz.int_validator(1, 100000)),
440                     default='10'
441                 ),
442             )
443         )
444
445     def gen_data_text_wizard(self):
446         """
447         Generic Wizard - Goal, metrics, data
448         """
449         self.wiz_generic_data_text = wiz.PromptWizard(
450             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
451             description="",
452             steps=(
453                 # The list of input prompts to ask the user.
454                 wiz.WizardStep(
455                     # ID where the value will be stored
456                     id="data_text_type",
457                     # Display name
458                     name=Bcolors.HEADER+" If Text, can you choose any one from the below list? Please type help for list"+Bcolors.ENDC,
459                     # Help message
460                     help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n  ",
461                     validators=(wiz.required_validator, wiz.int_validator(1, 8)),
462                     default='3'
463                 ),
464
465             )
466         )
467
468     def gen_about_data_basic_wizard(self):
469         """
470         Generic Wizard - Goal, metrics, data
471         """
472         self.wiz_generic_data_basic = wiz.PromptWizard(
473             name=Bcolors.OKBLUE+"Understanding Basic Input Data Information"+Bcolors.ENDC,
474             description="",
475             steps=(
476                 # The list of input prompts to ask the user.
477                 wiz.WizardStep(
478                     # ID where the value will be stored
479                     id="data_missing",
480                     # Display name
481                     name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC,
482                     # Help message
483                     help="Y/N",
484                     validators=(wiz.required_validator, wiz.boolean_validator),
485                     default='N'
486                 ),
487                 wiz.WizardStep(
488                     # ID where the value will be stored
489                     id="data_size_bytes",
490                     # Display name
491                     name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC,
492                     # Help message
493                     help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes",
494                     validators=(wiz.required_validator),
495                     default='1G'
496                 ),
497                 wiz.WizardStep(
498                     # ID where the value will be stored
499                     id="data_size_samples",
500                     # Display name
501                     name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC,
502                     # Help message
503                     help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples",
504                     validators=(wiz.required_validator),
505                     default='1M'
506                 ),
507             )
508         )
509
510     def gen_about_data_advanced_wizard(self):
511         """
512         Generic Wizard - Goal, metrics, data
513         """
514         self.wiz_generic_data_adv = wiz.PromptWizard(
515             name=Bcolors.OKBLUE+"Understanding Advanced Input Data Information"+Bcolors.ENDC,
516             description="",
517             steps=(
518                 # The list of input prompts to ask the user.
519                 wiz.WizardStep(
520                     # ID where the value will be stored
521                     id="data_distribution",
522                     # Display name
523                     name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC,
524                     # Help message
525                     help="Y/N - Yes",
526                     validators=(wiz.required_validator, wiz.boolean_validator),
527                     default='Y'
528                 ),
529                 wiz.WizardStep(
530                     # ID where the value will be stored
531                     id="data_io_relation",
532                     # Display name
533                     name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC,
534                     # Help message
535                     help="Y/N - Yes/No",
536                     validators=(wiz.required_validator, wiz.boolean_validator),
537                     default='Y'
538                 ),
539                 wiz.WizardStep(
540                     # ID where the value will be stored
541                     id="data_correlation",
542                     # Display name
543                     name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC,
544                     # Help message
545                     help="Y/N/ - Yes/No ",
546                     validators=(wiz.required_validator, wiz.boolean_validator),
547                     default='Y'
548                 ),
549                 wiz.WizardStep(
550                     # ID where the value will be stored
551                     id="data_cond_indep",
552                     # Display name
553                     name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC,
554                     # Help message
555                     help="Y/N/. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent",
556                     validators=(wiz.required_validator, wiz.boolean_validator),
557                     default='Y'
558                 ),
559             )
560         )
561
562     def gen_about_output_wizard(self):
563         """
564         Generic Wizard - Goal, metrics, data
565         """
566         self.wiz_generic_data_output = wiz.PromptWizard(
567             name=Bcolors.OKBLUE+"Understanding Data Output"+Bcolors.ENDC,
568             description="",
569             steps=(
570                 # The list of input prompts to ask the user.        
571                 wiz.WizardStep(
572                     # ID where the value will be stored
573                     id="data_type_output",
574                     # Display name
575                     name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
576                     # Help message
577                     help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass",
578                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
579                     default='1'
580                 ),
581                 wiz.WizardStep(
582                     # ID where the value will be stored
583                     id="data_output_prob",
584                     # Display name
585                     name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC,
586                     # Help message
587                     help="Y/N",
588                     validators=(wiz.required_validator, wiz.boolean_validator),
589                     default='N'
590                 ),
591             )
592         )
593
594
595     def unsupervised_wizard(self):
596         """
597         The Un-Supervized Learning Wizard
598         """
599         self.wiz_unsupervised = wiz.PromptWizard(
600             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
601             description="",
602             steps=(
603                 # The list of input prompts to ask the user.
604                 wiz.WizardStep(
605                     # ID where the value will be stored
606                     id="unsup_goal",
607                     # Display name
608                     name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC,
609                     # Help message
610                     help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n",
611                     validators=(wiz.required_validator, wiz.int_validator(1, 3)),
612                     default='1'
613                 ),
614                 wiz.WizardStep(
615                     # ID where the value will be stored
616                     id="unsup_dr_topic_mod",
617                     # Display name
618                     name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC,
619                     # Help message
620                     help="Y/N/NA",
621                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
622                                                                               'y','n','na','nA'])),
623                     default='NA'
624                 ),
625                 wiz.WizardStep(
626                     # ID where the value will be stored
627                     id="unsup_clus_dv",
628                     # Display name
629                     name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
630                     # Help message
631                     help="Y/N/NA",
632                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
633                                                                               'y','n','na','nA'])),
634                     default='NA'
635                 ),
636                 wiz.WizardStep(
637                     # ID where the value will be stored
638                     id="unsup_clus_outliers",
639                     # Display name
640                     name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
641                     # Help message
642                     help="Y/N/NA",
643                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
644                                                                               'y','n','na','nA'])),
645                     default='NA'
646                 ),
647                 wiz.WizardStep(
648                     # ID where the value will be stored
649                     id="unsup_clus_groups",
650                     # Display name
651                     name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC,
652                     # Help message
653                     help="Y/N/NA",
654                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
655                                                                               'y','n','na','nA'])),
656                     default='NA'
657                 ),
658
659             )
660         )
661
662     def reinforcement_wizard(self):
663         """
664         The Reinforced Learning Wizard
665         """
666         message = """
667             Reward  |--------|
668             |-------| Agent  |  Action
669             | |-----|        |-------|
670             | |     |--------|       |
671             | |state                 |
672             | |                      |
673             | |    |-----------|     |
674             | |----|Environment|     |
675             |------|           |-----|
676                    |-----------|
677             """
678         self.wiz_reinforcement = wiz.PromptWizard(
679             name=Bcolors.OKBLUE+"Reinforcement Specific"+Bcolors.ENDC,
680             description="",
681             steps=(
682                 # The list of input prompts to ask the user.
683                 wiz.WizardStep(
684                     # ID where the value will be stored
685                     id="ri_info",
686                     # Display name
687                     name=Bcolors.HEADER+" Type help for reference diagram for reinforcement-learning"+Bcolors.ENDC,
688                     # Help message
689                     help=message,
690                     validators=(wiz.required_validator),
691                     default='Type Help or Press Enter'
692                 ),
693                 wiz.WizardStep(
694                     # ID where the value will be stored
695                     id="ri_model_preference",
696                     # Display name
697                     name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC,
698                     # Help message
699                     help="Y/N/NA",
700                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
701                                                                               'y','n','na','nA'])),
702                     default='Y'
703                 ),
704                 wiz.WizardStep(
705                     # ID where the value will be stored
706                     id="ri_model_availability",
707                     # Display name
708                     name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
709                     # Help message
710                     help="Y/N/NA",
711                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
712                                                                               'y','n','na','nA'])),
713                     default='Y'
714                 ),
715                 wiz.WizardStep(
716                     # ID where the value will be stored
717                     id="ri_modelfree_value",
718                     # Display name
719                     name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
720                     # Help message
721                     help="Y/N/NA",
722                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
723                                                                               'y','n','na','nA'])),
724                     default='Y'
725                 ),
726                 wiz.WizardStep(
727                     # ID where the value will be stored
728                     id="ri_modelfree_value_state",
729                     # Display name
730                     name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC,
731                     # Help message
732                     help="Y/N/NA",
733                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
734                                                                               'y','n','na','nA'])),
735                     default='Y'
736                 ),
737                 wiz.WizardStep(
738                     # ID where the value will be stored
739                     id="ri_app_domain",
740                     # Display name
741                     name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
742                     # Help message
743                     help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n",
744                     validators=(wiz.required_validator, wiz.int_validator(1, 8)),
745                     default='1'
746                 ),
747             )
748         )
749
750     ############### All the Run Operations ######################
751     def run_mainwiz(self):
752         """
753         Run the Main Wizard
754         """
755         self.main_wizard_l1()
756         self.main_l1_values = self.wiz_main_l1.run(self.shell)
757         if self.main_l1_values['data_availability']:
758             print("OK-1")
759             self.main_wizard_l2_b()
760             self.main_l2b_values = self.wiz_main_l2_b.run(self.shell)
761             if self.main_l2b_values['data_label']:
762                 self.supervised = True
763             else:
764                 self.unsupervised = True
765             if self.main_l2b_values['data_programmability']:
766                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
767             else:
768                 self.main_wizard_l3()
769                 self.main_l3_values = self.wiz_main_l3.run(self.shell)
770                 if self.main_l3_values['data_knowledge']:
771                     print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
772                     self.ml_needed = True
773                 else:
774                     self.main_wizard_l4()
775                     self.main_l4_values = self.wiz_main_l4.run(self.shell)
776                     if self.main_l4_values['data_pattern']:
777                         print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
778                         self.ml_needed = True
779                     else:
780                         print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
781         else:
782             self.main_wizard_l2_a()
783             self.main_l2a_values = self.wiz_main_l2_a.run(self.shell)
784             if self.main_l2a_values['data_creativity']:
785                 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
786                 self.ml_needed = True
787                 self.reinforcement = True
788             else:
789                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
790
791     def run_generic_wizard(self):
792         """
793         Run Generic Wizard
794         """
795         self.gen_choice_wizard()
796         self.gen_choice_values = self.wiz_generic_choice.run(self.shell)
797         if self.gen_choice_values['data_metrics_pref']:
798             self.gen_metrics_wizard()
799             self.gen_metrics_values = self.wiz_generic_metrics.run(self.shell)
800         if self.gen_choice_values['data_main']:
801             self.gen_data_main_wizard()
802             self.gen_data_main_values = self.wiz_generic_data_main.run(self.shell)
803             if int(self.gen_data_main_values['data_column']) == 3:
804                 self.gen_data_text_wizard()
805                 self.gen_data_text_values = self.wiz_generic_data_text.run(self.shell)
806             else:
807                 self.gen_data_text_values = {'data_text_type': '3'}
808             if int(self.gen_data_main_values['data_column']) == 1:
809                 self.gen_data_features_wizard()
810                 self.gen_data_features_values = self.wiz_generic_data_features.run(self.shell)
811             else:
812                 self.gen_data_features_values = {'data_features': 'Y',
813                                                  'data_features_count': '10'}
814             if int(self.gen_data_main_values['data_column']) == 2:
815                 self.gen_data_signal_wizard()
816                 self.gen_data_signal_values = self.wiz_generic_data_signal.run(self.shell)
817             else:
818                 self.gen_data_signal_values = {'data_signal_type': '1'}
819         else:
820             self.gen_data_main_values = {'data_column': '1'}
821             print("Unknown Data Type")
822         if self.gen_choice_values['data_databasic_pref']:
823             self.gen_about_data_basic_wizard()
824             self.gen_about_data_basic_values = self.wiz_generic_data_basic.run(self.shell)
825         else:
826             self.gen_about_data_basic_values = {'data_missing':'N',
827                                                 'data_size_bytes': '1G',
828                                                 'data_size_samples': '1M'}
829         if self.gen_choice_values['data_dataadv_pref']:
830             self.gen_about_data_advanced_wizard()
831             self.gen_about_data_adv_values = self.wiz_generic_data_adv.run(self.shell)
832         else:
833             self.gen_about_data_adv_values = {'data_distribution': 'N',
834                                               'data_io_relation': 'N',
835                                               'data_correlation': 'N',
836                                               'data_cond_indep': 'N'}
837         if self.gen_choice_values['data_dataoutput_pref']:
838             self.gen_about_output_wizard()
839             self.gen_about_data_output_values = self.wiz_generic_data_output.run(self.shell)
840         else:
841             self.gen_about_data_output_values = {'data_type_output': '1',
842                                                  'data_output_prob': 'N'}
843
844
845     def run_unsupervised_wizard(self):
846         """
847         Run UnSupervised Learning Wizard.
848         """
849         self.unsupervised_wizard()
850         self.unsup_values = self.wiz_unsupervised.run(self.shell)
851
852     def run_reinforcement_wizard(self):
853         """
854         Run Reinforced Learning Wizard
855         """
856         self.reinforcement_wizard()
857         self.ri_values = self.wiz_reinforcement.run(self.shell)
858
859     def decide_unsupervised(self):
860         """
861         Decide which Unsupervised-learning to use
862         """
863         repro = False
864         clus_prob = False
865         if int(self.unsup_values['unsup_goal']) == 1:
866             # Clustering
867             if 'high' in self.data_size:
868                 if not self.reproducibility:
869                     clus_prob = True
870                 else:
871                     repro = True
872             else:
873                 if 'y' in self.unsup_values['unsup_clus_dv'].lower():
874                     if 'y' in self.unsup_values['unsup_clus_groups'].lower():
875                         clus_prob = True
876                     else:
877                         print("Unsupervised Learning model to consider: Hierarchical Clustering")
878                         return
879                 else:
880                     repro = True
881             if repro:
882                 if 'y' in self.unsup_values['unsup_clus_outliers'].lower():
883                     print("Unsupervised Learning model to consider: Hierarchical Clustering")
884                 else:
885                     print("Unsupervised Learning model to consider: DBSCAN")
886                 return
887             if clus_prob:
888                 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
889                     print("Unsupervised Learning model to consider: Gaussian Mixture")
890                 else:
891                     print("Unsupervised Learning model to consider: KMeans")
892                 return
893         elif int(self.unsup_values['unsup_goal']) == 2:
894             # Dimensionality Reduction
895             if 'y' in self.unsup_values['unsup_dr_topic_mod'].lower():
896                 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
897                     print("Unsupervised Learning model to consider: SVD")
898                 else:
899                     print("Unsupervised Learning model to consider: LDA")
900             else:
901                 print("Unsupervised Learning model to consider: PCA")
902         else:
903             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
904
905     def decide_reinforcement(self):
906         """
907         Decide which reinforement learning to use.
908         """
909         if (int(self.gen_about_data_output_values['data_type_output']) == 2 or
910                 'y' in self.ri_values['ri_model_preference'].lower()):
911             # Model Bsaed
912             if 'y' in self.ri_values['ri_model_availability'].lower():
913                 print("Reinforcement Learning model to consider - AlphaZero")
914             else:
915                 print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE")
916         elif 'n' in self.ri_values['ri_model_preference'].lower():
917             # Model-Free based approach.
918             if 'y' not in self.ri_values['ri_modelfree_value'].lower():
919                 print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic")
920             else:
921                 if 'y' in self.ri_values['ri_modelfree_value_state'].lower():
922                     print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)")
923                 else:
924                     print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets")
925         else:
926             # Default
927             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
928
929     def perform_inference(self):
930         """
931         Perform Inferences. Used across all 3 types.
932         """
933         # Decide whether data is Low or High
934         self.data_size = 'unknown'
935         if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
936                 't' in self.gen_about_data_basic_values['data_size_samples']):
937             self.data_size = 'low'
938
939         if int(self.gen_metrics_values['metric_interpretability']) >= 3 :
940             self.interpretability = True
941         if int(self.gen_metrics_values['metric_speed']) >= 3 :
942             self.faster = True
943         if int(self.gen_metrics_values['metric_reproducibility']) >= 3 :
944             self.reproducibility = True
945
946         # Decide Features relative to Data (ftod_ratio) - high/low
947         if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
948                 't' in self.gen_about_data_basic_values['data_size_samples']):
949             if int(self.gen_data_features_values['data_features_count']) > 50:
950                 self.ftod_ratio = 'high'
951         elif ('m' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
952                 'm' in self.gen_about_data_basic_values['data_size_samples']):
953             if int(self.gen_data_features_values['data_features_count']) > 5000:
954                 self.ftod_ratio = 'high'
955         else:
956             if int(self.gen_data_features_values['data_features_count']) > 500000:
957                 self.ftod_ratio = 'high'
958
959
960     def decide_supervised(self):
961         """
962         Decide which Supervised learning to use.
963         """
964         if 'high' in self.data_size:
965             # Cover: DT, RF, RNN, CNN, ANN and Naive Bayes
966             if self.interpretability:
967                 if self.faster:
968                     print("Supervised Learning model to consider  - Decision Tree")
969                 else:
970                     print("Supervised Learning model to consider  - Random Forest")
971             else:
972                 if int(self.gen_data_main_values['data_column']) == 3:
973                     print("Supervised Learning model to consider  - RNN")
974                 elif (int(self.gen_data_main_values['data_column']) == 2 and
975                         int(self.gen_data_signal_values['data_signal_type']) == 1):
976                     print("Supervised Learning model to consider  - CNN")
977                 elif (int(self.gen_data_main_values['data_column']) == 2 and
978                         (int(self.gen_data_signal_values['data_signal_type']) == 2 or
979                             int(self.gen_data_signal_values['data_signal_type']) == 3)):
980                     if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
981                         print("Supervised Learning model to consider  - Naive Bayes")
982                     else:
983                         print("Supervised Learning model to consider  - ANN")
984                 else:
985                     print("Supervised model to consider  Learning - ANN")
986         elif 'low' in self.data_size:
987             from_b = False
988             # Cover: Regressions
989             if 'high' in self.ftod_ratio:
990                 from_b = True
991             else:
992                 print("Supervised Learning model to consider  - SVN with Gaussian Kernel")
993                 return
994             if int(self.gen_about_data_output_values['data_type_output']) != 2:
995                 from_b = True
996             else:
997                 if 'y' in self.gen_about_data_adv_values['data_io_relation'].lower():
998                     print("Supervised Learning model to consider  - Linear Regression or Linear SVM")
999                 else:
1000                     print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
1001                 return
1002             if from_b:
1003                 if int(self.gen_about_data_output_values['data_output_type']) == 4:
1004                     if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
1005                         if 'y' in self.gen_about_data_adv_values['data_cond_indep'].lower():
1006                             print("Supervised Learning model to consider  - Naive Bayes")
1007                         else:
1008                             if 'y' in self.gen_about_data_adv_values['data_correlation'].lower():
1009                                 print("Supervised Learning model to consider  - LASSO or Ridge Regression")
1010                             else:
1011                                 print("Supervised Learning model to consider  - Logistic Regression")
1012                     else:
1013                         print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
1014
1015                 else:
1016                     print("Supervised Learning model to consider - KNN")
1017         else:
1018             # Default
1019             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
1020
1021     def ask_and_decide(self):
1022         """
1023         THe Main Engine
1024         """
1025         self.run_mainwiz()
1026         if self.ml_needed:
1027             self.run_generic_wizard()
1028             if self.supervised:
1029                 self.decide_supervised()
1030             elif self.unsupervised:
1031                 self.run_unsupervised_wizard()
1032                 self.decide_unsupervised()
1033             elif self.reinforcement:
1034                 self.run_reinforcement_wizard()
1035                 self.decide_reinforcement()
1036
1037
1038 def signal_handler(signum, frame):
1039     """
1040     Signal Handler
1041     """
1042     print("\n You interrupted, No Suggestion will be provided!")
1043     print(signum, frame)
1044     sys.exit(0)
1045
1046 def main():
1047     """
1048     The Main Function
1049     """
1050     try:
1051         algowiz = AlgoSelectorWizard()
1052         algowiz.ask_and_decide()
1053     except(KeyboardInterrupt, MemoryError):
1054         print("Some Error Occured - No Suggestion can be provided")
1055
1056     print("Thanks for using the Algoselector-Wizard, " +
1057             "Hope our suggestion will be useful")
1058
1059 if __name__ == "__main__":
1060     signal.signal(signal.SIGINT, signal_handler)
1061     main()