90b289c42a8a5646eb3238ccee4ebe406f554648
[thoth.git] / tools / modelselector / modelselector.py
1 # Copyright 2021 Spirent Communications.
2 # sridhar.rao@spirent.com
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 #   http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16 """
17 Tool to suggest which ML approach is more applicable for
18 a particular data and usecase.
19 TODO:
20 1. Minimize code.
21 a. Reduce returns.
22 b. Optimize loops.
23
24 2. Add Informative data to the user.
25 """
26
27 from __future__ import print_function
28 import signal
29 import sys
30 from pypsi import wizard as wiz
31 from pypsi.shell import Shell
32
33 # pylint: disable=line-too-long,too-few-public-methods,too-many-instance-attributes, too-many-nested-blocks, too-many-return-statements, too-many-branches
34
35 class Bcolors:
36     """
37     For Coloring
38     """
39     HEADER = '\033[95m'
40     OKBLUE = '\033[94m'
41     OKGREEN = '\033[92m'
42     WARNING = '\033[93m'
43     FAIL = '\033[91m'
44     ENDC = '\033[0m'
45     BOLD = '\033[1m'
46     UNDERLINE = '\033[4m'
47
48 class AlgoSelectorWizard():
49     """
50     Class to create wizards
51     """
52     def __init__(self):
53         """
54         Perform Initialization.
55         """
56         self.shell = Shell()
57         self.main_values = {}
58         self.main_l1_values = {}
59         self.main_l2a_values = {}
60         self.main_l2b_values = {}
61         self.main_l3_values = {}
62         self.main_l4_values = {}
63         self.unsup_values = {}
64         self.ri_values = {}
65         self.gen_values = {}
66         self.wiz_main = None
67         self.wiz_main_l1 = None
68         self.wiz_main_l2_a = None
69         self.wiz_main_l2_b = None
70         self.wiz_main_l3 = None
71         self.wiz_main_l4 = None
72         self.wiz_generic = None
73         self.wiz_unsupervised = None
74         self.wiz_reinforcement = None
75         self.ml_needed = False
76         self.supervised = False
77         self.unsupervised = False
78         self.reinforcement = False
79         self.data_size = 'high'
80         self.interpretability = False
81         self.faster = False
82         self.ftod_ratio = 'low'
83         self.reproducibility = False
84
85
86     ############# All the Wizards ##################################
87
88     ### GENERIC Wizards - Need for ML ##############################
89     def main_wizard_l1(self):
90         """
91         The Main Wizard L1
92         """
93         self.wiz_main_l1 = wiz.PromptWizard(
94             name=Bcolors.OKBLUE+"Do you Need ML - Data Availability"+Bcolors.ENDC,
95             description="",
96             steps=(
97                 # The list of input prompts to ask the user.
98                 wiz.WizardStep(
99                     # ID where the value will be stored
100                     id="data_availability",
101                     # Display name
102                     name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC,
103                     # Help message
104                     help="Y/N/U - Yes/No/Unknown",
105                     validators=(wiz.required_validator),
106                     default='Y',
107                 ),
108             )
109         )
110
111     def main_wizard_l2_a(self):
112         """
113         The Main Wizard L2-A
114         """
115         self.wiz_main_l2_a = wiz.PromptWizard(
116             name=Bcolors.OKBLUE+"Do you Need ML - Data Creation"+Bcolors.ENDC,
117             description="",
118             steps=(
119                 # The list of input prompts to ask the user.
120                 wiz.WizardStep(
121                     # ID where the value will be stored
122                     id="data_creativity",
123                     # Display name
124                     name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC,
125                     # Help message
126                     help="Y/N/U - Yes/No/Unknown",
127                     validators=(wiz.required_validator),
128                     default='Y',
129                 ),
130             )
131         )
132
133     def main_wizard_l2_b(self):
134         """
135         The Main Wizard L2-B
136         """
137         label = """ One or more meaningful and informative 'tag' to provide context so that a machine learning model can learn from it. For example, labels might indicate whether a photo contains a bird or car, which words were uttered in an audio recording, or if an x-ray contains a tumor. Data labeling is required for a variety of use cases including computer vision, natural language processing, and speech recognition."""
138         self.wiz_main_l2_b = wiz.PromptWizard(
139             name=Bcolors.OKBLUE+"Do you Need ML - Data Programmability"+Bcolors.ENDC,
140             description="",
141             steps=(
142                 # The list of input prompts to ask the user.
143                 wiz.WizardStep(
144                     # ID where the value will be stored
145                     id="data_label",
146                     # Display name
147                     name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N/U - Yes/No/Unknown). Type help for description of label. "+Bcolors.ENDC,
148                     # Help message
149                     help=label,
150                     validators=(wiz.required_validator),
151                     default='Y',
152                 ),
153                 wiz.WizardStep(
154                     # ID where the value will be stored
155                     id="data_programmability",
156                     # Display name
157                     name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
158                     # Help message
159                     help="Y/N/U - Yes/No/Unknown",
160                     validators=(wiz.required_validator),
161                     default='Y',
162                 ),
163             )
164         )
165
166
167     def main_wizard_l3(self):
168         """
169         The Main Wizard L3
170         """
171         self.wiz_main_l3 = wiz.PromptWizard(
172             name=Bcolors.OKBLUE+"Do you Need ML - Data Knowledge"+Bcolors.ENDC,
173             description="",
174             steps=(
175                 # The list of input prompts to ask the user.
176                 wiz.WizardStep(
177                     # ID where the value will be stored
178                     id="data_knowledge",
179                     # Display name
180                     name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
181                     # Help message
182                     help="Y/N/U - Yes/No/Unknown",
183                     validators=(wiz.required_validator),
184                     default='Y',
185                 ),
186             )
187         )
188
189     def main_wizard_l4(self):
190         """
191         The Main Wizard - L4
192         """
193         self.wiz_main_l4 = wiz.PromptWizard(
194             name=Bcolors.OKBLUE+"Do you Need ML - Data Pattern"+Bcolors.ENDC,
195             description="",
196             steps=(
197                 # The list of input prompts to ask the user.
198                 wiz.WizardStep(
199                     # ID where the value will be stored
200                     id="data_pattern",
201                     # Display name
202                     name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC,
203                     # Help message
204                     help="Y/N/U - Yes/No/Unknown",
205                     validators=(wiz.required_validator),
206                     default='Y'
207                 ),
208             )
209         )
210     ### GENERIC Wizards - GOAL, METRICS, DATA ##############################
211     def gen_wizard(self):
212         """
213         Generic Wizard - Goal, metrics, data
214         """
215         self.wiz_generic = wiz.PromptWizard(
216             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
217             description="",
218             steps=(
219                 # The list of input prompts to ask the user.
220                 wiz.WizardStep(
221                     # ID where the value will be stored
222                     id="data_goal",
223                     # Display name
224                     name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC,
225                     # Help message
226                     help="Enter one of Predict/Describe/Explore",
227                     validators=(wiz.required_validator),
228                     default='Explore'
229                 ),
230                 wiz.WizardStep(
231                     # ID where the value will be stored
232                     id="metric_accuracy",
233                     # Display name
234                     name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
235                     # Help message
236                     help="Enter 1-5: 1 being least important, and 5 being most important",
237                     validators=(wiz.required_validator),
238                     default='1'
239                 ),
240                 wiz.WizardStep(
241                     # ID where the value will be stored
242                     id="metric_speed",
243                     # Display name
244                     name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
245                     # Help message
246                     help="Enter 1-5: 1 being least important, and 5 being most important",
247                     validators=(wiz.required_validator),
248                     default='1'
249                 ),
250                 wiz.WizardStep(
251                     # ID where the value will be stored
252                     id="metric_interpretability",
253                     # Display name
254                     name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
255                     # Help message
256                     help="Enter 1-5: 1 being least important, and 5 being most important",
257                     validators=(wiz.required_validator),
258                     default='1'
259                 ),
260                 wiz.WizardStep(
261                     # ID where the value will be stored
262                     id="metric_reproducibility",
263                     # Display name
264                     name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
265                     # Help message
266                     help="Enter 1-5: 1 being least important, and 5 being most important",
267                     validators=(wiz.required_validator),
268                     default='1'
269                 ),
270                 wiz.WizardStep(
271                     # ID where the value will be stored
272                     id="metric_implementation",
273                     # Display name
274                     name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
275                     # Help message
276                     help="Enter 1-5: 1 being least important, and 5 being most important",
277                     validators=(wiz.required_validator),
278                     default='1'
279                 ),
280                 wiz.WizardStep(
281                     # ID where the value will be stored
282                     id="data_column",
283                     # Display name
284                     name=Bcolors.HEADER+" What does the data (columns) represent? well defined 'Features', 'signals' (Timeseries, pixels, etc) or Text - (Please type the associated number)"+Bcolors.ENDC,
285                     # Help message
286                     help="1. Well Defined Features\n 2. Signals\n 3. Text - Unstructured\n 4. None of the above\n",
287                     validators=(wiz.required_validator),
288                     default='Features'
289                 ),
290                 wiz.WizardStep(
291                     # ID where the value will be stored
292                     id="data_signal_type",
293                     # Display name
294                     name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? "+Bcolors.ENDC,
295                     # Help message
296                     help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n  ",
297                     validators=(wiz.required_validator),
298                     default='3'
299                 ),
300                 wiz.WizardStep(
301                     # ID where the value will be stored
302                     id="data_text_type",
303                     # Display name
304                     name=Bcolors.HEADER+" If Text, can you choose any one from the below list? "+Bcolors.ENDC,
305                     # Help message
306                     help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n  ",
307                     validators=(wiz.required_validator),
308                     default='3'
309                 ),
310                 wiz.WizardStep(
311                     # ID where the value will be stored
312                     id="data_features",
313                     # Display name
314                     name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC,
315                     # Help message
316                     help="Y/N/NA",
317                     validators=(wiz.required_validator),
318                     default='Y'
319                 ),
320                 wiz.WizardStep(
321                     # ID where the value will be stored
322                     id="data_features_count",
323                     # Display name
324                     name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC,
325                     # Help message
326                     help="Number or NA",
327                     validators=(wiz.required_validator),
328                     default='10'
329                 ),
330                 wiz.WizardStep(
331                     # ID where the value will be stored
332                     id="data_distribution",
333                     # Display name
334                     name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC,
335                     # Help message
336                     help="Y/N/U",
337                     validators=(wiz.required_validator),
338                     default='Y'
339                 ),
340                 wiz.WizardStep(
341                     # ID where the value will be stored
342                     id="data_io_relation",
343                     # Display name
344                     name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC,
345                     # Help message
346                     help="Y/N/U",
347                     validators=(wiz.required_validator),
348                     default='Y'
349                 ),
350                 wiz.WizardStep(
351                     # ID where the value will be stored
352                     id="data_correlation",
353                     # Display name
354                     name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC,
355                     # Help message
356                     help="Y/N/U. Change in one  ",
357                     validators=(wiz.required_validator),
358                     default='Y'
359                 ),
360                 wiz.WizardStep(
361                     # ID where the value will be stored
362                     id="data_cond_indep",
363                     # Display name
364                     name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC,
365                     # Help message
366                     help="Y/N/U. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent",
367                     validators=(wiz.required_validator),
368                     default='Y'
369                 ),
370                 wiz.WizardStep(
371                     # ID where the value will be stored
372                     id="data_missing",
373                     # Display name
374                     name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC,
375                     # Help message
376                     help="Y/N/U",
377                     validators=(wiz.required_validator),
378                     default='N'
379                 ),
380                 wiz.WizardStep(
381                     # ID where the value will be stored
382                     id="data_size_bytes",
383                     # Display name
384                     name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC,
385                     # Help message
386                     help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes",
387                     validators=(wiz.required_validator),
388                     default='1G'
389                 ),
390                 wiz.WizardStep(
391                     # ID where the value will be stored
392                     id="data_size_samples",
393                     # Display name
394                     name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC,
395                     # Help message
396                     help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples",
397                     validators=(wiz.required_validator),
398                     default='1M'
399                 ),
400                 wiz.WizardStep(
401                     # ID where the value will be stored
402                     id="data_type_output",
403                     # Display name
404                     name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
405                     # Help message
406                     help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass",
407                     validators=(wiz.required_validator),
408                     default='1'
409                 ),
410                 wiz.WizardStep(
411                     # ID where the value will be stored
412                     id="data_output_prob",
413                     # Display name
414                     name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC,
415                     # Help message
416                     help="Y/N",
417                     validators=(wiz.required_validator),
418                     default='N'
419                 ),
420             )
421         )
422
423
424     def unsupervised_wizard(self):
425         """
426         The Un-Supervized Learning Wizard
427         """
428         self.wiz_generic = wiz.PromptWizard(
429             name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
430             description="",
431             steps=(
432                 # The list of input prompts to ask the user.
433                 wiz.WizardStep(
434                     # ID where the value will be stored
435                     id="unsup_goal",
436                     # Display name
437                     name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC,
438                     # Help message
439                     help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n",
440                     validators=(wiz.required_validator),
441                     default='1'
442                 ),
443                 wiz.WizardStep(
444                     # ID where the value will be stored
445                     id="unsup_dr_topic_mod",
446                     # Display name
447                     name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC,
448                     # Help message
449                     help="Y/N/NA",
450                     validators=(wiz.required_validator),
451                     default='NA'
452                 ),
453                 wiz.WizardStep(
454                     # ID where the value will be stored
455                     id="unsup_clus_dv",
456                     # Display name
457                     name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
458                     # Help message
459                     help="Y/N/NA",
460                     validators=(wiz.required_validator),
461                     default='NA'
462                 ),
463                 wiz.WizardStep(
464                     # ID where the value will be stored
465                     id="unsup_clus_outliers",
466                     # Display name
467                     name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
468                     # Help message
469                     help="Y/N/NA",
470                     validators=(wiz.required_validator),
471                     default='NA'
472                 ),
473                 wiz.WizardStep(
474                     # ID where the value will be stored
475                     id="unsup_clus_groups",
476                     # Display name
477                     name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC,
478                     # Help message
479                     help="Y/N/NA",
480                     validators=(wiz.required_validator),
481                     default='NA'
482                 ),
483
484             )
485         )
486
487     def reinforcement_wizard(self):
488         """
489         The Reinforced Learning Wizard
490         """
491         message = """
492             Reward  |--------|
493             |-------| Agent  |  Action
494             | |-----|        |-------|
495             | |     |--------|       |
496             | |state                 |
497             | |                      |
498             | |    |-----------|     |
499             | |----|Environment|     |
500             |------|           |-----|
501                    |-----------|
502             """
503         self.wiz_reinforcement = wiz.PromptWizard(
504             name=Bcolors.OKBLUE+"Reinforcement Specific"+Bcolors.ENDC,
505             description="",
506             steps=(
507                 # The list of input prompts to ask the user.
508                 wiz.WizardStep(
509                     # ID where the value will be stored
510                     id="ri_info",
511                     # Display name
512                     name=Bcolors.HEADER+" Type help for reference diagram for reinforcement-learning"+Bcolors.ENDC,
513                     # Help message
514                     help=message,
515                     validators=(wiz.required_validator),
516                     default='Type Help or Press Enter'
517                 ),
518                 wiz.WizardStep(
519                     # ID where the value will be stored
520                     id="ri_model_preference",
521                     # Display name
522                     name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC,
523                     # Help message
524                     help="Y/N/NA",
525                     validators=(wiz.required_validator),
526                     default='Y'
527                 ),
528                 wiz.WizardStep(
529                     # ID where the value will be stored
530                     id="ri_model_availability",
531                     # Display name
532                     name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
533                     # Help message
534                     help="Y/N/NA",
535                     validators=(wiz.required_validator),
536                     default='Y'
537                 ),
538                 wiz.WizardStep(
539                     # ID where the value will be stored
540                     id="ri_modelfree_value",
541                     # Display name
542                     name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
543                     # Help message
544                     help="Y/N/NA",
545                     validators=(wiz.required_validator),
546                     default='Y'
547                 ),
548                 wiz.WizardStep(
549                     # ID where the value will be stored
550                     id="ri_modelfree_value_state",
551                     # Display name
552                     name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC,
553                     # Help message
554                     help="Y/N/NA",
555                     validators=(wiz.required_validator),
556                     default='Y'
557                 ),
558                 wiz.WizardStep(
559                     # ID where the value will be stored
560                     id="ri_app_domain",
561                     # Display name
562                     name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
563                     # Help message
564                     help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n",
565                     validators=(wiz.required_validator),
566                     default='1'
567                 ),
568             )
569         )
570
571     ############### All the Run Operations ######################
572     def run_mainwiz(self):
573         """
574         Run the Main Wizard
575         """
576         self.main_wizard_l1()
577         self.main_l1_values = self.wiz_main_l1.run(self.shell)
578         if self.main_l1_values['data_availability'].lower() == 'y':
579             self.main_wizard_l2_b()
580             self.main_l2b_values = self.wiz_main_l2_b.run(self.shell)
581             if self.main_l2b_values['data_labe'].lower() == 'y':
582                 self.supervised = True
583             else:
584                 self.unsupervised = True
585             if self.main_l2b_values['data_programmability'].lower() == 'y':
586                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
587             else:
588                 self.main_wizard_l3()
589                 self.main_l3_values = self.wiz_main_l3.run(self.shell)
590                 if self.main_l3_values['data_knowledge'].lower() == 'y':
591                     print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
592                     self.ml_needed = True
593                 else:
594                     self.main_wizard_l4()
595                     self.main_l4_values = self.wiz_main_l4.run(self.shell)
596                     if self.main_l4_values['data_pattern'].lower() == 'y':
597                         print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
598                         self.ml_needed = True
599                     else:
600                         print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
601         else:
602             self.main_wizard_l2_a()
603             self.main_l2a_values = self.wiz_main_l2_a.run(self.shell)
604             if self.main_l2a_values['data_creativity'].lower() == 'y':
605                 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
606                 self.ml_needed = True
607                 self.reinforcement = True
608             else:
609                 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
610
611     def run_generic_wizard(self):
612         """
613         Run Generic Wizard
614         """
615         self.gen_wizard()
616         self.gen_values = self.wiz_generic.run(self.shell)
617
618     def run_unsupervised_wizard(self):
619         """
620         Run UnSupervised Learning Wizard.
621         """
622         self.unsupervised_wizard()
623         self.unsup_values = self.wiz_unsupervised.run(self.shell)
624
625     def run_reinforcement_wizard(self):
626         """
627         Run Reinforced Learning Wizard
628         """
629         self.reinforcement_wizard()
630         self.ri_values = self.wiz_reinforcement.run(self.shell)
631
632     def decide_unsupervised(self):
633         """
634         Decide which Unsupervised-learning to use
635         """
636         repro = False
637         clus_prob = False
638         if int(self.unsup_values['unsup_goal']) == 1:
639             # Clustering
640             if 'high' in self.data_size:
641                 if not self.reproducibility:
642                     clus_prob = True
643                 else:
644                     repro = True
645             else:
646                 if 'y' in self.unsup_values['unsup_clus_dv'].tolower():
647                     if 'y' in self.unsup_values['unsup_clus_groups'].tolower():
648                         clus_prob = True
649                     else:
650                         print("Unsupervised Learning model to consider: Hierarchical Clustering")
651                         return
652                 else:
653                     repro = True
654             if repro:
655                 if 'y' in self.unsup_values['unsup_clus_outliers'].tolower():
656                     print("Unsupervised Learning model to consider: Hierarchical Clustering")
657                 else:
658                     print("Unsupervised Learning model to consider: DBSCAN")
659                 return
660             if clus_prob:
661                 if 'y' in self.gen_values['data_output_prob'].tolower():
662                     print("Unsupervised Learning model to consider: Gaussian Mixture")
663                 else:
664                     print("Unsupervised Learning model to consider: KMeans")
665                 return
666         elif int(self.unsup_values['unsup_goal']) == 2:
667             # Dimensionality Reduction
668             if 'y' in self.unsup_values['unsup_dr_topic_mod'].tolower():
669                 if 'y' in self.gen_values['data_output_prob'].tolower():
670                     print("Unsupervised Learning model to consider: SVD")
671                 else:
672                     print("Unsupervised Learning model to consider: LDA")
673             else:
674                 print("Unsupervised Learning model to consider: PCA")
675         else:
676             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
677
678     def decide_reinforcement(self):
679         """
680         Decide which reinforement learning to use.
681         """
682         if (int(self.gen_values['data_type_output']) == 2 or
683                 'y' in self.ri_values['ri_model_preference'].tolower()):
684             # Model Bsaed
685             if 'y' in self.ri_values['ri_model_availability'].tolower():
686                 print("Reinforcement Learning model to consider - AlphaZero")
687             else:
688                 print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE")
689         elif 'n' in self.ri_values['ri_model_preference'].tolower():
690             # Model-Free based approach.
691             if 'y' not in self.ri_values['ri_modelfree_value'].tolower():
692                 print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic")
693             else:
694                 if 'y' in self.ri_values['ri_modelfree_value_state'].tolower():
695                     print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)")
696                 else:
697                     print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets")
698         else:
699             # Default
700             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
701
702     def perform_inference(self):
703         """
704         Perform Inferences. Used across all 3 types.
705         """
706         # Decide whether data is Low or High
707         self.data_size = 'unknown'
708         if ('k' in self.gen_values['data_size_bytes'].lower() or
709                 't' in self.gen_values['data_size_samples']):
710             self.data_size = 'low'
711
712         if int(self.gen_values['metric_interpretability']) >= 3 :
713             self.interpretability = True
714         if int(self.gen_values['metric_speed']) >= 3 :
715             self.faster = True
716         if int(self.gen_values['metric_reproducibility']) >= 3 :
717             self.reproducibility = True
718
719         # Decide Features relative to Data (ftod_ratio) - high/low
720         if ('k' in self.gen_values['data_size_bytes'].lower() or
721                 't' in self.gen_values['data_size_samples']):
722             if int(self.gen_values['data_features_count']) > 50:
723                 self.ftod_ratio = 'high'
724         elif ('m' in self.gen_values['data_size_bytes'].lower() or
725                 'm' in self.gen_values['data_size_samples']):
726             if int(self.gen_values['data_features_count']) > 5000:
727                 self.ftod_ratio = 'high'
728         else:
729             if int(self.gen_values['data_features_count']) > 500000:
730                 self.ftod_ratio = 'high'
731
732
733     def decide_supervised(self):
734         """
735         Decide which Supervised learning to use.
736         """
737         if 'high' in self.data_size:
738             # Cover: DT, RF, RNN, CNN, ANN and Naive Bayes
739             if self.interpretability:
740                 if self.faster:
741                     print("Supervised Learning model to consider  - Decision Tree")
742                 else:
743                     print("Supervised Learning model to consider  - Random Forest")
744             else:
745                 if int(self.gen_values['data_column']) == 3:
746                     print("Supervised Learning model to consider  - RNN")
747                 elif (int(self.gen_values['data_column']) == 2 and
748                         int(self.gen_values['data_signal_type']) == 1):
749                     print("Supervised Learning model to consider  - CNN")
750                 elif (int(self.gen_values['data_column']) == 2 and
751                         (int(self.gen_values['data_signal_type']) == 2 or
752                             int(self.gen_values['data_signal_type']) == 3)):
753                     if 'y' in self.gen_values['data_output_prob'].tolower():
754                         print("Supervised Learning model to consider  - Naive Bayes")
755                     else:
756                         print("Supervised Learning model to consider  - ANN")
757                 else:
758                     print("Supervised model to consider  Learning - ANN")
759         elif 'low' in self.data_size:
760             from_b = False
761             # Cover: Regressions
762             if 'high' in self.ftod_ratio:
763                 from_b = True
764             else:
765                 print("Supervised Learning model to consider  - SVN with Gaussian Kernel")
766                 return
767             if int(self.gen_values['data_type_output']) != 2:
768                 from_b = True
769             else:
770                 if 'y' in self.gen_values['data_io_relation'].tolower():
771                     print("Supervised Learning model to consider  - Linear Regression or Linear SVM")
772                 else:
773                     print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
774                 return
775             if from_b:
776                 if int(self.gen_values['data_output_type']) == 4:
777                     if 'y' in self.gen_values['data_output_prob'].tolower():
778                         if 'y' in self.gen_values['data_cond_indep'].tolower():
779                             print("Supervised Learning model to consider  - Naive Bayes")
780                         else:
781                             if 'y' in self.gen_values['data_correlation'].tolower():
782                                 print("Supervised Learning model to consider  - LASSO or Ridge Regression")
783                             else:
784                                 print("Supervised Learning model to consider  - Logistic Regression")
785                     else:
786                         print("Supervised Learning model to consider  - Polynomial Regression or nonLinear SVM")
787
788                 else:
789                     print("Supervised Learning model to consider - KNN")
790         else:
791             # Default
792             print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
793
794     def ask_and_decide(self):
795         """
796         THe Main Engine
797         """
798         self.run_mainwiz()
799         if self.ml_needed:
800             self.run_generic_wizard()
801             if self.supervised:
802                 self.decide_supervised()
803             elif self.unsupervised:
804                 self.run_unsupervised_wizard()
805                 self.decide_unsupervised()
806             elif self.reinforcement:
807                 self.run_reinforcement_wizard()
808                 self.decide_reinforcement()
809
810
811 def signal_handler(signum, frame):
812     """
813     Signal Handler
814     """
815     print("\n You interrupted, No Suggestion will be provided!")
816     print(signum, frame)
817     sys.exit(0)
818
819 def main():
820     """
821     The Main Function
822     """
823     try:
824         algowiz = AlgoSelectorWizard()
825         algowiz.ask_and_decide()
826     except(KeyboardInterrupt, MemoryError):
827         print("Some Error Occured - No Suggestion can be provided")
828
829     print("Thanks for using the Algoselector-Wizard, " +
830             "Hope our suggestion will be useful")
831
832 if __name__ == "__main__":
833     signal.signal(signal.SIGINT, signal_handler)
834     main()