Tools: Update Model Selector
[thoth.git] / tools / modelselector / modelselector.py
1 # Copyright 2022 Linux Foundation.
2 # srao@linuxfoundation.org
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 #   http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16 """
17 Tool to suggest which ML approach is more applicable for
18 a particular data and usecase.
19 TODO:
20 1. Minimize code.
21 2. Add Informative data to the user.
22 3. Check for Size Entry - 1G/K ..
23 """
24
25 from __future__ import print_function
26 import signal
27 import sys
28 from pypsi import wizard as wiz
29 from pypsi.shell import Shell
30
31 # pylint: disable=line-too-long,too-few-public-methods,too-many-instance-attributes, too-many-nested-blocks, too-many-return-statements, too-many-branches
32
33 class Bcolors:
34     """
35     For Coloring
36     """
37     HEADER = '\033[95m'
38     OKBLUE = '\033[94m'
39     OKGREEN = '\033[92m'
40     WARNING = '\033[93m'
41     FAIL = '\033[91m'
42     ENDC = '\033[0m'
43     BOLD = '\033[1m'
44     UNDERLINE = '\033[4m'
45
46 class AlgoSelectorWizard():
47     """
48     Class to create wizards
49     """
50     def __init__(self):
51         """
52         Perform Initialization.
53         """
54         self.shell = Shell()
55         # Set of all values from the user
56         self.main_values = {}
57         self.main_l1_values = {}
58         self.main_l2a_values = {}
59         self.main_l2b_values = {}
60         self.main_l3_values = {}
61         self.main_l4_values = {}
62         self.unsup_values = {}
63         self.ri_values = {}
64         self.gen_values = {}
65         self.gen_choice_values = {}
66         self.gen_metrics_values = {}
67         self.gen_data_main_values = {}
68         self.gen_data_text_values = {}
69         self.gen_data_features_values = {}
70         self.gen_data_signal_values = {}
71         self.gen_about_data_basic_values = {}
72         self.gen_about_data_adv_values = {}
73         self.gen_about_data_output_values = {}
74         self.gans_values = {}
75         # Set of Wizards.
76         self.wiz_main = None
77         self.wiz_main_l1 = None
78         self.wiz_main_l2_a = None
79         self.wiz_main_l2_b = None
80         self.wiz_main_l3 = None
81         self.wiz_main_l4 = None
82         self.wiz_generic = None
83         self.wiz_generic_choice = None
84         self.wiz_geneirc_metric = None
85         self.wiz_generic_data_main = None
86         self.wiz_generic_data_signal = None
87         self.wiz_generic_data_features = None
88         self.wiz_generic_data_text = None
89         self.wiz_generic_data_basic = None
90         self.wiz_generic_data_adv = None
91         self.wiz_generic_data_output = None
92         self.wiz_unsupervised = None
93         self.wiz_reinforcement = None
94         self.wiz_gans = None
95         # Some Inferences
96         self.ml_needed = False
97         self.ml_gans = False
98         self.supervised = False
99         self.unsupervised = False
100         self.reinforcement = False
101         self.data_size = 'high'
102         self.interpretability = False
103         self.faster = False
104         self.ftod_ratio = 'low'
105         self.reproducibility = False
106
107
108     ############# All the Wizards ##################################
109
110     ### GENERIC Wizards - Need for ML ##############################
111     def main_wizard_l1(self):
112         """
113         The Main Wizard L1
114         """
115         self.wiz_main_l1 = wiz.PromptWizard(
116             name=Bcolors.OKBLUE+"Do you Need ML - Data Availability"+Bcolors.ENDC,
117             description="",
118             steps=(
119                 # The list of input prompts to ask the user.
120                 wiz.WizardStep(
121                     # ID where the value will be stored
122                     id="data_availability",
123                     # Display name
124                     name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC,
125                     # Help message
126                     help="Y/N - Yes/No",
127                     validators=(wiz.required_validator, wiz.boolean_validator),
128                     default='Y',
129                 ),
130             )
131         )
132     
133     def gans_wizard(self):
134         """
135         The GANs Wizard
136         """
137         self.wiz_gans = wiz.PromptWizard(
138             name=Bcolors.OKBLUE+"Synthetic Data Genration using GANs"+Bcolors.ENDC,
139             description="",
140             steps=(
141                 # The list of input prompts to ask the user.
142                 wiz.WizardStep(
143                     # ID where the value will be stored
144                     id="gans_data_type",
145                     # Display name
146                     name=Bcolors.HEADER+"Is the sample data you have is time-series? Answer Y/N - Yes/No"+Bcolors.ENDC,
147                     # Help message
148                     help="Y/N - Yes/No",
149                     validators=(wiz.required_validator, wiz.boolean_validator),
150                     default='Y',
151                 ),
152                 wiz.WizardStep(
153                     # ID where the value will be stored
154                     id="gans_data_variables",
155                     # Display name
156                     name=Bcolors.HEADER+"Is the sample data you have is multi-variate (more than one features/columns) ? Answer Y/N - Yes/No"+Bcolors.ENDC,
157                     # Help message
158                     help="Y/N - Yes/No",
159                     validators=(wiz.required_validator, wiz.boolean_validator),
160                     default='Y',
161                 ),
162             )
163         )
164
165
166     def main_wizard_l2_a(self):
167         """
168         The Main Wizard L2-A
169         """
170         self.wiz_main_l2_a = wiz.PromptWizard(
171             name=Bcolors.OKBLUE+"Do you Need ML - Data Creation"+Bcolors.ENDC,
172             description="",
173             steps=(
174                 # The list of input prompts to ask the user.
175                 wiz.WizardStep(
176                     # ID where the value will be stored
177                     id="data_creativity",
178                     # Display name
179                     name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC,
180                     # Help message
181                     help="Y/N - Yes/No",
182                     validators=(wiz.required_validator, wiz.boolean_validator),
183                     default='Y',
184                 ),
185             )
186         )
187
188     def main_wizard_l2_b(self):
189         """
190         The Main Wizard L2-B
191         """
192         gan = """ Synthetic data generation is an important use-case for Telco-scenarios, due to difficulty in getting good dataset."""
193         label = """ One or more meaningful and informative 'tag' to provide context so that a machine learning model can learn from it. For example, labels might indicate whether a photo contains a bird or car, which words were uttered in an audio recording, or if an x-ray contains a tumor. Data labeling is required for a variety of use cases including computer vision, natural language processing, and speech recognition."""
194         self.wiz_main_l2_b = wiz.PromptWizard(
195             name=Bcolors.OKBLUE+"Do you Need ML - Data Programmability"+Bcolors.ENDC,
196             description="",
197             steps=(
198                 # The list of input prompts to ask the user.
199                 wiz.WizardStep(
200                     # ID where the value will be stored
201                     id="data_generation",
202                     # Display name
203                     name=Bcolors.HEADER+" Do you want to generate Synthetic Data from the existing data (Type Y/N - Yes/No). Type helfp for the description"+Bcolors.ENDC,
204                     # Help message
205                     help=gan,
206                     validators=(wiz.required_validator, wiz.boolean_validator),
207                     default='N',
208                 ),
209                 wiz.WizardStep(
210                     # ID where the value will be stored
211                     id="data_label",
212                     # Display name
213                     name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N - Yes/No). Type help for description of label. "+Bcolors.ENDC,
214                     # Help message
215                     help=label,
216                     validators=(wiz.required_validator, wiz.boolean_validator),
217                     default='Y',
218                 ),
219                 wiz.WizardStep(
220                     # ID where the value will be stored
221                     id="data_programmability",
222                     # Display name
223                     name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
224                     # Help message
225                     help="Y/N - Yes/No",
226                     validators=(wiz.required_validator, wiz.boolean_validator),
227                     default='N',
228                 ),
229             )
230         )
231
232
233     def main_wizard_l3(self):
234         """
235         The Main Wizard L3
236         """
237         self.wiz_main_l3 = wiz.PromptWizard(
238             name=Bcolors.OKBLUE+"Do you Need ML - Data Knowledge"+Bcolors.ENDC,
239             description="",
240             steps=(
241                 # The list of input prompts to ask the user.
242                 wiz.WizardStep(
243                     # ID where the value will be stored
244                     id="data_knowledge",
245                     # Display name
246                     name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
247                     # Help message
248                     help="Y/N - Yes/No",
249                     validators=(wiz.required_validator, wiz.boolean_validator),
250                     default='Y',
251                 ),
252             )
253         )
254
255     def main_wizard_l4(self):
256         """
257         The Main Wizard - L4
258         """
259         self.wiz_main_l4 = wiz.PromptWizard(
260             name=Bcolors.OKBLUE+"Do you Need ML - Data Pattern"+Bcolors.ENDC,
261             description="",
262             steps=(
263                 # The list of input prompts to ask the user.
264                 wiz.WizardStep(
265                     # ID where the value will be stored
266                     id="data_pattern",
267                     # Display name
268                     name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC,
269                     # Help message
270                     help="Y/N - Yes/No.",
271                     validators=(wiz.required_validator, wiz.boolean_validator),
272                     default='Y'
273                 ),
274             )
275         )
276     ### GENERIC Wizards - GOAL, METRICS, DATA ##############################
277     def gen_choice_wizard(self):
278         """
279         Generic Wizard - Goal, metrics, data
280         """
281         self.wiz_generic_choice = wiz.PromptWizard(
282             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
283             description="",
284             steps=(
285                 # The list of input prompts to ask the user.
286                 wiz.WizardStep(
287                     # ID where the value will be stored
288                     id="data_goal",
289                     # Display name
290                     name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC,
291                     # Help message
292                     help="Enter one of Predict/Describe/Explore",
293                     validators=(wiz.required_validator, wiz.choice_validator(['Predict',
294                                                                               'predict',
295                                                                               'Describe',
296                                                                               'describe',
297                                                                               'Explore',
298                                                                               'explore'])),
299                     default='Explore'
300                 ),
301                 wiz.WizardStep(
302                     # ID where the value will be stored
303                     id="data_metrics_pref",
304                     # Display name
305                     name=Bcolors.HEADER+" Do you know which metrics (speed, accuracy, etc.) are more important for you? "+Bcolors.ENDC,
306                     # Help message
307                     help="Y/N - Yes/No",
308                     validators=(wiz.required_validator, wiz.boolean_validator),
309                     default='Y'
310                 ),
311                 wiz.WizardStep(
312                     # ID where the value will be stored
313                     id="data_main",
314                     # Display name
315                     name=Bcolors.HEADER+" Do you know about the input data type (If its signal/features/text)  ?  "+Bcolors.ENDC,
316                     # Help message
317                     help="Y/N - Yes/No",
318                     validators=(wiz.required_validator, wiz.boolean_validator),
319                     default='Y'
320                 ),
321                 wiz.WizardStep(
322                     # ID where the value will be stored
323                     id="data_databasic_pref",
324                     # Display name
325                     name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the input data? "+Bcolors.ENDC,
326                     # Help message
327                     help="Y/N - Yes/No",
328                     validators=(wiz.required_validator, wiz.boolean_validator),
329                     default='Y'
330                 ),
331                 wiz.WizardStep(
332                     # ID where the value will be stored
333                     id="data_dataadv_pref",
334                     # Display name
335                     name=Bcolors.HEADER+" Do you have advanced information (distribution, relation, independency, etc.) about the input data? "+Bcolors.ENDC,
336                     # Help message
337                     help="Y/N - Yes/No",
338                     validators=(wiz.required_validator, wiz.boolean_validator),
339                     default='Y'
340                 ),
341                 wiz.WizardStep(
342                     # ID where the value will be stored
343                     id="data_dataoutput_pref",
344                     # Display name
345                     name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the output? "+Bcolors.ENDC,
346                     # Help message
347                     help="Y/N - Yes/No",
348                     validators=(wiz.required_validator, wiz.boolean_validator),
349                     default='Y'
350                 ),
351             )
352         )
353
354     def gen_metrics_wizard(self):
355         """
356         Generic Wizard - Goal, metrics, data
357         """
358         self.wiz_generic_metrics = wiz.PromptWizard(
359             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
360             description="",
361             steps=(
362                 # The list of input prompts to ask the user.
363                 wiz.WizardStep(
364                     # ID where the value will be stored
365                     id="metric_accuracy",
366                     # Display name
367                     name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
368                     # Help message
369                     help="Enter 1-5: 1 being least important, and 5 being most important",
370                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
371                     default='1'
372                 ),
373                 wiz.WizardStep(
374                     # ID where the value will be stored
375                     id="metric_speed",
376                     # Display name
377                     name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
378                     # Help message
379                     help="Enter 1-5: 1 being least important, and 5 being most important",
380                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
381                     default='1'
382                 ),
383                 wiz.WizardStep(
384                     # ID where the value will be stored
385                     id="metric_interpretability",
386                     # Display name
387                     name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
388                     # Help message
389                     help="Enter 1-5: 1 being least important, and 5 being most important",
390                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
391                     default='1'
392                 ),
393                 wiz.WizardStep(
394                     # ID where the value will be stored
395                     id="metric_reproducibility",
396                     # Display name
397                     name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
398                     # Help message
399                     help="Enter 1-5: 1 being least important, and 5 being most important",
400                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
401                     default='1'
402                 ),
403                 wiz.WizardStep(
404                     # ID where the value will be stored
405                     id="metric_implementation",
406                     # Display name
407                     name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
408                     # Help message
409                     help="Enter 1-5: 1 being least important, and 5 being most important",
410                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
411                     default='1'
412                 ),
413             )
414         )
415
416     def gen_data_main_wizard(self):
417         """
418         Generic Wizard - Goal, metrics, data
419         """
420         self.wiz_generic_data_main = wiz.PromptWizard(
421             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
422             description="",
423             steps=(
424                 # The list of input prompts to ask the user.
425                 wiz.WizardStep(
426                     # ID where the value will be stored
427                     id="data_column",
428                     # Display name
429                     name=Bcolors.HEADER+" What does the data (columns) represent? Please type help and select the associated number"+Bcolors.ENDC,
430                     # Help message
431                     help="1. Well Defined Features\n 2. Signals - Timeseries, pixels, etc\n 3. Text - Unstructured\n 4. None of the above\n",
432                     validators=(wiz.required_validator, wiz.int_validator(1, 4)),
433                     default='1'
434                 ),
435             )
436         )
437
438     def gen_data_signal_wizard(self):
439         """
440         Generic Wizard - Goal, metrics, data
441         """
442         self.wiz_generic_data_signal = wiz.PromptWizard(
443             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
444             description="",
445             steps=(
446                 # The list of input prompts to ask the user.
447                 wiz.WizardStep(
448                     # ID where the value will be stored
449                     id="data_signal_type",
450                     # Display name
451                     name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? Please type help for list "+Bcolors.ENDC,
452                     # Help message
453                     help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n  ",
454                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
455                     default='3'
456                 ),
457             )
458         )
459
460     def gen_data_features_wizard(self):
461         """
462         Generic Wizard - Goal, metrics, data
463         """
464         self.wiz_generic_data_features = wiz.PromptWizard(
465             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
466             description="",
467             steps=(
468                 # The list of input prompts to ask the user.
469                 wiz.WizardStep(
470                     # ID where the value will be stored
471                     id="data_features",
472                     # Display name
473                     name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC,
474                     # Help message
475                     help="Y/N",
476                     validators=(wiz.required_validator, wiz.boolean_validator),
477                     default='Y'
478                 ),
479                 wiz.WizardStep(
480                     # ID where the value will be stored
481                     id="data_features_count",
482                     # Display name
483                     name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC,
484                     # Help message
485                     help="Number only - Approximate should be OK.",
486                     validators=(wiz.required_validator, wiz.int_validator(1, 100000)),
487                     default='10'
488                 ),
489             )
490         )
491
492     def gen_data_text_wizard(self):
493         """
494         Generic Wizard - Goal, metrics, data
495         """
496         self.wiz_generic_data_text = wiz.PromptWizard(
497             name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
498             description="",
499             steps=(
500                 # The list of input prompts to ask the user.
501                 wiz.WizardStep(
502                     # ID where the value will be stored
503                     id="data_text_type",
504                     # Display name
505                     name=Bcolors.HEADER+" If Text, can you choose any one from the below list? Please type help for list"+Bcolors.ENDC,
506                     # Help message
507                     help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n  ",
508                     validators=(wiz.required_validator, wiz.int_validator(1, 8)),
509                     default='3'
510                 ),
511
512             )
513         )
514
515     def gen_about_data_basic_wizard(self):
516         """
517         Generic Wizard - Goal, metrics, data
518         """
519         self.wiz_generic_data_basic = wiz.PromptWizard(
520             name=Bcolors.OKBLUE+"Understanding Basic Input Data Information"+Bcolors.ENDC,
521             description="",
522             steps=(
523                 # The list of input prompts to ask the user.
524                 wiz.WizardStep(
525                     # ID where the value will be stored
526                     id="data_missing",
527                     # Display name
528                     name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC,
529                     # Help message
530                     help="Y/N",
531                     validators=(wiz.required_validator, wiz.boolean_validator),
532                     default='N'
533                 ),
534                 wiz.WizardStep(
535                     # ID where the value will be stored
536                     id="data_size_bytes",
537                     # Display name
538                     name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC,
539                     # Help message
540                     help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes",
541                     validators=(wiz.required_validator),
542                     default='1G'
543                 ),
544                 wiz.WizardStep(
545                     # ID where the value will be stored
546                     id="data_size_samples",
547                     # Display name
548                     name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC,
549                     # Help message
550                     help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples",
551                     validators=(wiz.required_validator),
552                     default='1M'
553                 ),
554             )
555         )
556
557     def gen_about_data_advanced_wizard(self):
558         """
559         Generic Wizard - Goal, metrics, data
560         """
561         self.wiz_generic_data_adv = wiz.PromptWizard(
562             name=Bcolors.OKBLUE+"Understanding Advanced Input Data Information"+Bcolors.ENDC,
563             description="",
564             steps=(
565                 # The list of input prompts to ask the user.
566                 wiz.WizardStep(
567                     # ID where the value will be stored
568                     id="data_distribution",
569                     # Display name
570                     name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC,
571                     # Help message
572                     help="Y/N - Yes",
573                     validators=(wiz.required_validator, wiz.boolean_validator),
574                     default='Y'
575                 ),
576                 wiz.WizardStep(
577                     # ID where the value will be stored
578                     id="data_io_relation",
579                     # Display name
580                     name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC,
581                     # Help message
582                     help="Y/N - Yes/No",
583                     validators=(wiz.required_validator, wiz.boolean_validator),
584                     default='Y'
585                 ),
586                 wiz.WizardStep(
587                     # ID where the value will be stored
588                     id="data_correlation",
589                     # Display name
590                     name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC,
591                     # Help message
592                     help="Y/N/ - Yes/No ",
593                     validators=(wiz.required_validator, wiz.boolean_validator),
594                     default='Y'
595                 ),
596                 wiz.WizardStep(
597                     # ID where the value will be stored
598                     id="data_cond_indep",
599                     # Display name
600                     name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC,
601                     # Help message
602                     help="Y/N/. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent",
603                     validators=(wiz.required_validator, wiz.boolean_validator),
604                     default='Y'
605                 ),
606             )
607         )
608
609     def gen_about_output_wizard(self):
610         """
611         Generic Wizard - Goal, metrics, data
612         """
613         self.wiz_generic_data_output = wiz.PromptWizard(
614             name=Bcolors.OKBLUE+"Understanding Data Output"+Bcolors.ENDC,
615             description="",
616             steps=(
617                 # The list of input prompts to ask the user.        
618                 wiz.WizardStep(
619                     # ID where the value will be stored
620                     id="data_type_output",
621                     # Display name
622                     name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
623                     # Help message
624                     help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass",
625                     validators=(wiz.required_validator, wiz.int_validator(1, 5)),
626                     default='1'
627                 ),
628                 wiz.WizardStep(
629                     # ID where the value will be stored
630                     id="data_output_prob",
631                     # Display name
632                     name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC,
633                     # Help message
634                     help="Y/N",
635                     validators=(wiz.required_validator, wiz.boolean_validator),
636                     default='N'
637                 ),
638             )
639         )
640
641
642     def unsupervised_wizard(self):
643         """
644         The Un-Supervized Learning Wizard
645         """
646         self.wiz_unsupervised = wiz.PromptWizard(
647             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
648             description="",
649             steps=(
650                 # The list of input prompts to ask the user.
651                 wiz.WizardStep(
652                     # ID where the value will be stored
653                     id="unsup_goal",
654                     # Display name
655                     name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC,
656                     # Help message
657                     help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n",
658                     validators=(wiz.required_validator, wiz.int_validator(1, 3)),
659                     default='1'
660                 ),
661                 wiz.WizardStep(
662                     # ID where the value will be stored
663                     id="unsup_dr_topic_mod",
664                     # Display name
665                     name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC,
666                     # Help message
667                     help="Y/N/NA",
668                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
669                                                                               'y','n','na','nA'])),
670                     default='NA'
671                 ),
672                 wiz.WizardStep(
673                     # ID where the value will be stored
674                     id="unsup_clus_dv",
675                     # Display name
676                     name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
677                     # Help message
678                     help="Y/N/NA",
679                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
680                                                                               'y','n','na','nA'])),
681                     default='NA'
682                 ),
683                 wiz.WizardStep(
684                     # ID where the value will be stored
685                     id="unsup_clus_outliers",
686                     # Display name
687                     name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
688                     # Help message
689                     help="Y/N/NA",
690                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
691                                                                               'y','n','na','nA'])),
692                     default='NA'
693                 ),
694                 wiz.WizardStep(
695                     # ID where the value will be stored
696                     id="unsup_clus_groups",
697                     # Display name
698                     name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC,
699                     # Help message
700                     help="Y/N/NA",
701                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
702                                                                               'y','n','na','nA'])),
703                     default='NA'
704                 ),
705
706             )
707         )
708
709     def reinforcement_wizard(self):
710         """
711         The Reinforced Learning Wizard
712         """
713         message = """
714             Reward  |--------|
715             |-------| Agent  |  Action
716             | |-----|        |-------|
717             | |     |--------|       |
718             | |state                 |
719             | |                      |
720             | |    |-----------|     |
721             | |----|Environment|     |
722             |------|           |-----|
723                    |-----------|
724             """
725         self.wiz_reinforcement = wiz.PromptWizard(
726             name=Bcolors.OKBLUE+"Reinforcement Specific"+Bcolors.ENDC,
727             description="",
728             steps=(
729                 # The list of input prompts to ask the user.
730                 wiz.WizardStep(
731                     # ID where the value will be stored
732                     id="ri_info",
733                     # Display name
734                     name=Bcolors.HEADER+" Type help for reference diagram for reinforcement-learning"+Bcolors.ENDC,
735                     # Help message
736                     help=message,
737                     validators=(wiz.required_validator),
738                     default='Type Help or Press Enter'
739                 ),
740                 wiz.WizardStep(
741                     # ID where the value will be stored
742                     id="ri_model_preference",
743                     # Display name
744                     name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC,
745                     # Help message
746                     help="Y/N/NA",
747                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
748                                                                               'y','n','na','nA'])),
749                     default='Y'
750                 ),
751                 wiz.WizardStep(
752                     # ID where the value will be stored
753                     id="ri_model_availability",
754                     # Display name
755                     name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
756                     # Help message
757                     help="Y/N/NA",
758                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
759                                                                               'y','n','na','nA'])),
760                     default='Y'
761                 ),
762                 wiz.WizardStep(
763                     # ID where the value will be stored
764                     id="ri_modelfree_value",
765                     # Display name
766                     name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
767                     # Help message
768                     help="Y/N/NA",
769                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
770                                                                               'y','n','na','nA'])),
771                     default='Y'
772                 ),
773                 wiz.WizardStep(
774                     # ID where the value will be stored
775                     id="ri_modelfree_value_state",
776                     # Display name
777                     name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC,
778                     # Help message
779                     help="Y/N/NA",
780                     validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
781                                                                               'y','n','na','nA'])),
782                     default='Y'
783                 ),
784                 wiz.WizardStep(
785                     # ID where the value will be stored
786                     id="ri_app_domain",
787                     # Display name
788                     name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
789                     # Help message
790                     help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n",
791                     validators=(wiz.required_validator, wiz.int_validator(1, 8)),
792                     default='1'
793                 ),
794             )
795         )
796
797     ############### All the Run Operations ######################
798     def run_mainwiz(self):
799         """
800         Run the Main Wizard
801         """
802         self.main_wizard_l1()
803         self.main_l1_values = self.wiz_main_l1.run(self.shell)
804         if self.main_l1_values['data_availability']:
805             print("OK-1")
806             self.main_wizard_l2_b()
807             self.main_l2b_values = self.wiz_main_l2_b.run(self.shell)
808             if self.main_l2b_values['data_label']:
809                 self.supervised = True
810             else:
811                 self.unsupervised = True
812             if self.main_l2b_values['data_programmability']:
813                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
814             elif self.main_l2b_values['data_generation']:
815                 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
816                 self.ml_needed = True
817                 self.ml_gans = True
818             else:
819                 self.main_wizard_l3()
820                 self.main_l3_values = self.wiz_main_l3.run(self.shell)
821                 if self.main_l3_values['data_knowledge']:
822                     print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
823                     self.ml_needed = True
824                 else:
825                     self.main_wizard_l4()
826                     self.main_l4_values = self.wiz_main_l4.run(self.shell)
827                     if self.main_l4_values['data_pattern']:
828                         print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
829                         self.ml_needed = True
830                     else:
831                         print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
832         else:
833             self.main_wizard_l2_a()
834             self.main_l2a_values = self.wiz_main_l2_a.run(self.shell)
835             if self.main_l2a_values['data_creativity']:
836                 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
837                 self.ml_needed = True
838                 self.reinforcement = True
839             else:
840                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
841     
842     def run_gans_wizard(self):
843         """
844         Run GANs wizard
845         """
846         self.gans_wizard()
847         self.gans_values = self.wiz_gans.run(self.shell)
848         if self.gans_values['gans_data_type']:
849             if self.gans_values['gans_data_variables']:
850                 print("GANs technique to consider: TTS-GAN")
851             else:
852                 print("GANs technique to consider: TimeGAN")
853         else:
854             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
855                 
856
857
858
859     def run_generic_wizard(self):
860         """
861         Run Generic Wizard
862         """
863         self.gen_choice_wizard()
864         self.gen_choice_values = self.wiz_generic_choice.run(self.shell)
865         if self.gen_choice_values['data_metrics_pref']:
866             self.gen_metrics_wizard()
867             self.gen_metrics_values = self.wiz_generic_metrics.run(self.shell)
868         if self.gen_choice_values['data_main']:
869             self.gen_data_main_wizard()
870             self.gen_data_main_values = self.wiz_generic_data_main.run(self.shell)
871             if int(self.gen_data_main_values['data_column']) == 3:
872                 self.gen_data_text_wizard()
873                 self.gen_data_text_values = self.wiz_generic_data_text.run(self.shell)
874             else:
875                 self.gen_data_text_values = {'data_text_type': '3'}
876             if int(self.gen_data_main_values['data_column']) == 1:
877                 self.gen_data_features_wizard()
878                 self.gen_data_features_values = self.wiz_generic_data_features.run(self.shell)
879             else:
880                 self.gen_data_features_values = {'data_features': 'Y',
881                                                  'data_features_count': '10'}
882             if int(self.gen_data_main_values['data_column']) == 2:
883                 self.gen_data_signal_wizard()
884                 self.gen_data_signal_values = self.wiz_generic_data_signal.run(self.shell)
885             else:
886                 self.gen_data_signal_values = {'data_signal_type': '1'}
887         else:
888             self.gen_data_main_values = {'data_column': '1'}
889             print("Unknown Data Type")
890         if self.gen_choice_values['data_databasic_pref']:
891             self.gen_about_data_basic_wizard()
892             self.gen_about_data_basic_values = self.wiz_generic_data_basic.run(self.shell)
893         else:
894             self.gen_about_data_basic_values = {'data_missing':'N',
895                                                 'data_size_bytes': '1G',
896                                                 'data_size_samples': '1M'}
897         if self.gen_choice_values['data_dataadv_pref']:
898             self.gen_about_data_advanced_wizard()
899             self.gen_about_data_adv_values = self.wiz_generic_data_adv.run(self.shell)
900         else:
901             self.gen_about_data_adv_values = {'data_distribution': 'N',
902                                               'data_io_relation': 'N',
903                                               'data_correlation': 'N',
904                                               'data_cond_indep': 'N'}
905         if self.gen_choice_values['data_dataoutput_pref']:
906             self.gen_about_output_wizard()
907             self.gen_about_data_output_values = self.wiz_generic_data_output.run(self.shell)
908         else:
909             self.gen_about_data_output_values = {'data_type_output': '1',
910                                                  'data_output_prob': 'N'}
911
912
913     def run_unsupervised_wizard(self):
914         """
915         Run UnSupervised Learning Wizard.
916         """
917         self.unsupervised_wizard()
918         self.unsup_values = self.wiz_unsupervised.run(self.shell)
919
920     def run_reinforcement_wizard(self):
921         """
922         Run Reinforced Learning Wizard
923         """
924         self.reinforcement_wizard()
925         self.ri_values = self.wiz_reinforcement.run(self.shell)
926
927     def decide_unsupervised(self):
928         """
929         Decide which Unsupervised-learning to use
930         """
931         repro = False
932         clus_prob = False
933         if int(self.unsup_values['unsup_goal']) == 1:
934             # Clustering
935             if 'high' in self.data_size:
936                 if not self.reproducibility:
937                     clus_prob = True
938                 else:
939                     repro = True
940             else:
941                 if 'y' in self.unsup_values['unsup_clus_dv'].lower():
942                     if 'y' in self.unsup_values['unsup_clus_groups'].lower():
943                         clus_prob = True
944                     else:
945                         print("Unsupervised Learning model to consider: Hierarchical Clustering")
946                         return
947                 else:
948                     repro = True
949             if repro:
950                 if 'y' in self.unsup_values['unsup_clus_outliers'].lower():
951                     print("Unsupervised Learning model to consider: Hierarchical Clustering")
952                 else:
953                     print("Unsupervised Learning model to consider: DBSCAN")
954                 return
955             if clus_prob:
956                 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
957                     print("Unsupervised Learning model to consider: Gaussian Mixture")
958                 else:
959                     print("Unsupervised Learning model to consider: KMeans")
960                 return
961         elif int(self.unsup_values['unsup_goal']) == 2:
962             # Dimensionality Reduction
963             if 'y' in self.unsup_values['unsup_dr_topic_mod'].lower():
964                 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
965                     print("Unsupervised Learning model to consider: SVD")
966                 else:
967                     print("Unsupervised Learning model to consider: LDA")
968             else:
969                 print("Unsupervised Learning model to consider: PCA")
970         else:
971             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
972
973     def decide_reinforcement(self):
974         """
975         Decide which reinforement learning to use.
976         """
977         if (int(self.gen_about_data_output_values['data_type_output']) == 2 or
978                 'y' in self.ri_values['ri_model_preference'].lower()):
979             # Model Bsaed
980             if 'y' in self.ri_values['ri_model_availability'].lower():
981                 print("Reinforcement Learning model to consider - AlphaZero")
982             else:
983                 print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE")
984         elif 'n' in self.ri_values['ri_model_preference'].lower():
985             # Model-Free based approach.
986             if 'y' not in self.ri_values['ri_modelfree_value'].lower():
987                 print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic")
988             else:
989                 if 'y' in self.ri_values['ri_modelfree_value_state'].lower():
990                     print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)")
991                 else:
992                     print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets")
993         else:
994             # Default
995             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
996
997     def perform_inference(self):
998         """
999         Perform Inferences. Used across all 3 types.
1000         """
1001         # Decide whether data is Low or High
1002         self.data_size = 'unknown'
1003         if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
1004                 't' in self.gen_about_data_basic_values['data_size_samples']):
1005             self.data_size = 'low'
1006
1007         if int(self.gen_metrics_values['metric_interpretability']) >= 3 :
1008             self.interpretability = True
1009         if int(self.gen_metrics_values['metric_speed']) >= 3 :
1010             self.faster = True
1011         if int(self.gen_metrics_values['metric_reproducibility']) >= 3 :
1012             self.reproducibility = True
1013
1014         # Decide Features relative to Data (ftod_ratio) - high/low
1015         if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
1016                 't' in self.gen_about_data_basic_values['data_size_samples']):
1017             if int(self.gen_data_features_values['data_features_count']) > 50:
1018                 self.ftod_ratio = 'high'
1019         elif ('m' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
1020                 'm' in self.gen_about_data_basic_values['data_size_samples']):
1021             if int(self.gen_data_features_values['data_features_count']) > 5000:
1022                 self.ftod_ratio = 'high'
1023         else:
1024             if int(self.gen_data_features_values['data_features_count']) > 500000:
1025                 self.ftod_ratio = 'high'
1026
1027
1028     def decide_supervised(self):
1029         """
1030         Decide which Supervised learning to use.
1031         """
1032         if 'high' in self.data_size:
1033             # Cover: DT, RF, RNN, CNN, ANN and Naive Bayes
1034             if self.interpretability:
1035                 if self.faster:
1036                     print("Supervised Learning model to consider  - Decision Tree")
1037                 else:
1038                     print("Supervised Learning model to consider  - Random Forest")
1039             else:
1040                 if int(self.gen_data_main_values['data_column']) == 3:
1041                     print("Supervised Learning model to consider  - RNN")
1042                 elif (int(self.gen_data_main_values['data_column']) == 2 and
1043                         int(self.gen_data_signal_values['data_signal_type']) == 1):
1044                     print("Supervised Learning model to consider  - CNN")
1045                 elif (int(self.gen_data_main_values['data_column']) == 2 and
1046                         (int(self.gen_data_signal_values['data_signal_type']) == 2 or
1047                             int(self.gen_data_signal_values['data_signal_type']) == 3)):
1048                     if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
1049                         print("Supervised Learning model to consider  - Naive Bayes")
1050                     else:
1051                         print("Supervised Learning model to consider  - ANN")
1052                 else:
1053                     print("Supervised model to consider  Learning - ANN")
1054         elif 'low' in self.data_size:
1055             from_b = False
1056             # Cover: Regressions
1057             if 'high' in self.ftod_ratio:
1058                 from_b = True
1059             else:
1060                 print("Supervised Learning model to consider  - SVN with Gaussian Kernel")
1061                 return
1062             if int(self.gen_about_data_output_values['data_type_output']) != 2:
1063                 from_b = True
1064             else:
1065                 if 'y' in self.gen_about_data_adv_values['data_io_relation'].lower():
1066                     print("Supervised Learning model to consider  - Linear Regression or Linear SVM")
1067                 else:
1068                     print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
1069                 return
1070             if from_b:
1071                 if int(self.gen_about_data_output_values['data_output_type']) == 4:
1072                     if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
1073                         if 'y' in self.gen_about_data_adv_values['data_cond_indep'].lower():
1074                             print("Supervised Learning model to consider  - Naive Bayes")
1075                         else:
1076                             if 'y' in self.gen_about_data_adv_values['data_correlation'].lower():
1077                                 print("Supervised Learning model to consider  - LASSO or Ridge Regression")
1078                             else:
1079                                 print("Supervised Learning model to consider  - Logistic Regression")
1080                     else:
1081                         print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
1082
1083                 else:
1084                     print("Supervised Learning model to consider - KNN")
1085         else:
1086             # Default
1087             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
1088
1089     def ask_and_decide(self):
1090         """
1091         THe Main Engine
1092         """
1093         self.run_mainwiz()
1094         if self.ml_gans:
1095             self.run_gans_wizard()
1096             return
1097         if self.ml_needed:
1098             self.run_generic_wizard()
1099             if self.supervised:
1100                 self.decide_supervised()
1101             elif self.unsupervised:
1102                 self.run_unsupervised_wizard()
1103                 self.decide_unsupervised()
1104             elif self.reinforcement:
1105                 self.run_reinforcement_wizard()
1106                 self.decide_reinforcement()
1107
1108
1109 def signal_handler(signum, frame):
1110     """
1111     Signal Handler
1112     """
1113     print("\n You interrupted, No Suggestion will be provided!")
1114     print(signum, frame)
1115     sys.exit(0)
1116
1117 def main():
1118     """
1119     The Main Function
1120     """
1121     try:
1122         algowiz = AlgoSelectorWizard()
1123         algowiz.ask_and_decide()
1124     except(KeyboardInterrupt, MemoryError):
1125         print("Some Error Occured - No Suggestion can be provided")
1126
1127     print("Thanks for using the Algoselector-Wizard, " +
1128             "Hope our suggestion will be useful")
1129
1130 if __name__ == "__main__":
1131     signal.signal(signal.SIGINT, signal_handler)
1132     main()