1 # Copyright 2022 Linux Foundation.
2 # srao@linuxfoundation.org
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
17 Tool to suggest which ML approach is more applicable for
18 a particular data and usecase.
21 2. Add Informative data to the user.
22 3. Check for Size Entry - 1G/K ..
25 from __future__ import print_function
28 from pypsi import wizard as wiz
29 from pypsi.shell import Shell
31 # pylint: disable=line-too-long,too-few-public-methods,too-many-instance-attributes, too-many-nested-blocks, too-many-return-statements, too-many-branches
46 class AlgoSelectorWizard():
48 Class to create wizards
52 Perform Initialization.
55 # Set of all values from the user
57 self.main_l1_values = {}
58 self.main_l2a_values = {}
59 self.main_l2b_values = {}
60 self.main_l3_values = {}
61 self.main_l4_values = {}
62 self.unsup_values = {}
65 self.gen_choice_values = {}
66 self.gen_metrics_values = {}
67 self.gen_data_main_values = {}
68 self.gen_data_text_values = {}
69 self.gen_data_features_values = {}
70 self.gen_data_signal_values = {}
71 self.gen_about_data_basic_values = {}
72 self.gen_about_data_adv_values = {}
73 self.gen_about_data_output_values = {}
76 self.wiz_main_l1 = None
77 self.wiz_main_l2_a = None
78 self.wiz_main_l2_b = None
79 self.wiz_main_l3 = None
80 self.wiz_main_l4 = None
81 self.wiz_generic = None
82 self.wiz_generic_choice = None
83 self.wiz_geneirc_metric = None
84 self.wiz_generic_data_main = None
85 self.wiz_generic_data_signal = None
86 self.wiz_generic_data_features = None
87 self.wiz_generic_data_text = None
88 self.wiz_generic_data_basic = None
89 self.wiz_generic_data_adv = None
90 self.wiz_generic_data_output = None
91 self.wiz_unsupervised = None
92 self.wiz_reinforcement = None
94 self.ml_needed = False
95 self.supervised = False
96 self.unsupervised = False
97 self.reinforcement = False
98 self.data_size = 'high'
99 self.interpretability = False
101 self.ftod_ratio = 'low'
102 self.reproducibility = False
105 ############# All the Wizards ##################################
107 ### GENERIC Wizards - Need for ML ##############################
108 def main_wizard_l1(self):
112 self.wiz_main_l1 = wiz.PromptWizard(
113 name=Bcolors.OKBLUE+"Do you Need ML - Data Availability"+Bcolors.ENDC,
116 # The list of input prompts to ask the user.
118 # ID where the value will be stored
119 id="data_availability",
121 name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC,
124 validators=(wiz.required_validator, wiz.boolean_validator),
130 def main_wizard_l2_a(self):
134 self.wiz_main_l2_a = wiz.PromptWizard(
135 name=Bcolors.OKBLUE+"Do you Need ML - Data Creation"+Bcolors.ENDC,
138 # The list of input prompts to ask the user.
140 # ID where the value will be stored
141 id="data_creativity",
143 name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC,
146 validators=(wiz.required_validator, wiz.boolean_validator),
152 def main_wizard_l2_b(self):
156 label = """ One or more meaningful and informative 'tag' to provide context so that a machine learning model can learn from it. For example, labels might indicate whether a photo contains a bird or car, which words were uttered in an audio recording, or if an x-ray contains a tumor. Data labeling is required for a variety of use cases including computer vision, natural language processing, and speech recognition."""
157 self.wiz_main_l2_b = wiz.PromptWizard(
158 name=Bcolors.OKBLUE+"Do you Need ML - Data Programmability"+Bcolors.ENDC,
161 # The list of input prompts to ask the user.
163 # ID where the value will be stored
166 name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N - Yes/No). Type help for description of label. "+Bcolors.ENDC,
169 validators=(wiz.required_validator, wiz.boolean_validator),
173 # ID where the value will be stored
174 id="data_programmability",
176 name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
179 validators=(wiz.required_validator, wiz.boolean_validator),
186 def main_wizard_l3(self):
190 self.wiz_main_l3 = wiz.PromptWizard(
191 name=Bcolors.OKBLUE+"Do you Need ML - Data Knowledge"+Bcolors.ENDC,
194 # The list of input prompts to ask the user.
196 # ID where the value will be stored
199 name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
202 validators=(wiz.required_validator, wiz.boolean_validator),
208 def main_wizard_l4(self):
212 self.wiz_main_l4 = wiz.PromptWizard(
213 name=Bcolors.OKBLUE+"Do you Need ML - Data Pattern"+Bcolors.ENDC,
216 # The list of input prompts to ask the user.
218 # ID where the value will be stored
221 name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC,
223 help="Y/N - Yes/No.",
224 validators=(wiz.required_validator, wiz.boolean_validator),
229 ### GENERIC Wizards - GOAL, METRICS, DATA ##############################
230 def gen_choice_wizard(self):
232 Generic Wizard - Goal, metrics, data
234 self.wiz_generic_choice = wiz.PromptWizard(
235 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
238 # The list of input prompts to ask the user.
240 # ID where the value will be stored
243 name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC,
245 help="Enter one of Predict/Describe/Explore",
246 validators=(wiz.required_validator, wiz.choice_validator(['Predict',
255 # ID where the value will be stored
256 id="data_metrics_pref",
258 name=Bcolors.HEADER+" Do you know which metrics (speed, accuracy, etc.) are more important for you? "+Bcolors.ENDC,
261 validators=(wiz.required_validator, wiz.boolean_validator),
265 # ID where the value will be stored
268 name=Bcolors.HEADER+" Do you know about the input data type (If its signal/features/text) ? "+Bcolors.ENDC,
271 validators=(wiz.required_validator, wiz.boolean_validator),
275 # ID where the value will be stored
276 id="data_databasic_pref",
278 name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the input data? "+Bcolors.ENDC,
281 validators=(wiz.required_validator, wiz.boolean_validator),
285 # ID where the value will be stored
286 id="data_dataadv_pref",
288 name=Bcolors.HEADER+" Do you have advanced information (distribution, relation, independency, etc.) about the input data? "+Bcolors.ENDC,
291 validators=(wiz.required_validator, wiz.boolean_validator),
295 # ID where the value will be stored
296 id="data_dataoutput_pref",
298 name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the output? "+Bcolors.ENDC,
301 validators=(wiz.required_validator, wiz.boolean_validator),
307 def gen_metrics_wizard(self):
309 Generic Wizard - Goal, metrics, data
311 self.wiz_generic_metrics = wiz.PromptWizard(
312 name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
315 # The list of input prompts to ask the user.
317 # ID where the value will be stored
318 id="metric_accuracy",
320 name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
322 help="Enter 1-5: 1 being least important, and 5 being most important",
323 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
327 # ID where the value will be stored
330 name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
332 help="Enter 1-5: 1 being least important, and 5 being most important",
333 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
337 # ID where the value will be stored
338 id="metric_interpretability",
340 name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
342 help="Enter 1-5: 1 being least important, and 5 being most important",
343 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
347 # ID where the value will be stored
348 id="metric_reproducibility",
350 name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
352 help="Enter 1-5: 1 being least important, and 5 being most important",
353 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
357 # ID where the value will be stored
358 id="metric_implementation",
360 name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
362 help="Enter 1-5: 1 being least important, and 5 being most important",
363 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
369 def gen_data_main_wizard(self):
371 Generic Wizard - Goal, metrics, data
373 self.wiz_generic_data_main = wiz.PromptWizard(
374 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
377 # The list of input prompts to ask the user.
379 # ID where the value will be stored
382 name=Bcolors.HEADER+" What does the data (columns) represent? Please type help and select the associated number"+Bcolors.ENDC,
384 help="1. Well Defined Features\n 2. Signals - Timeseries, pixels, etc\n 3. Text - Unstructured\n 4. None of the above\n",
385 validators=(wiz.required_validator, wiz.int_validator(1, 4)),
391 def gen_data_signal_wizard(self):
393 Generic Wizard - Goal, metrics, data
395 self.wiz_generic_data_signal = wiz.PromptWizard(
396 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
399 # The list of input prompts to ask the user.
401 # ID where the value will be stored
402 id="data_signal_type",
404 name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? Please type help for list "+Bcolors.ENDC,
406 help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n ",
407 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
413 def gen_data_features_wizard(self):
415 Generic Wizard - Goal, metrics, data
417 self.wiz_generic_data_features = wiz.PromptWizard(
418 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
421 # The list of input prompts to ask the user.
423 # ID where the value will be stored
426 name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC,
429 validators=(wiz.required_validator, wiz.boolean_validator),
433 # ID where the value will be stored
434 id="data_features_count",
436 name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC,
438 help="Number only - Approximate should be OK.",
439 validators=(wiz.required_validator, wiz.int_validator(1, 100000)),
445 def gen_data_text_wizard(self):
447 Generic Wizard - Goal, metrics, data
449 self.wiz_generic_data_text = wiz.PromptWizard(
450 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
453 # The list of input prompts to ask the user.
455 # ID where the value will be stored
458 name=Bcolors.HEADER+" If Text, can you choose any one from the below list? Please type help for list"+Bcolors.ENDC,
460 help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n ",
461 validators=(wiz.required_validator, wiz.int_validator(1, 8)),
468 def gen_about_data_basic_wizard(self):
470 Generic Wizard - Goal, metrics, data
472 self.wiz_generic_data_basic = wiz.PromptWizard(
473 name=Bcolors.OKBLUE+"Understanding Basic Input Data Information"+Bcolors.ENDC,
476 # The list of input prompts to ask the user.
478 # ID where the value will be stored
481 name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC,
484 validators=(wiz.required_validator, wiz.boolean_validator),
488 # ID where the value will be stored
489 id="data_size_bytes",
491 name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC,
493 help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes",
494 validators=(wiz.required_validator),
498 # ID where the value will be stored
499 id="data_size_samples",
501 name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC,
503 help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples",
504 validators=(wiz.required_validator),
510 def gen_about_data_advanced_wizard(self):
512 Generic Wizard - Goal, metrics, data
514 self.wiz_generic_data_adv = wiz.PromptWizard(
515 name=Bcolors.OKBLUE+"Understanding Advanced Input Data Information"+Bcolors.ENDC,
518 # The list of input prompts to ask the user.
520 # ID where the value will be stored
521 id="data_distribution",
523 name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC,
526 validators=(wiz.required_validator, wiz.boolean_validator),
530 # ID where the value will be stored
531 id="data_io_relation",
533 name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC,
536 validators=(wiz.required_validator, wiz.boolean_validator),
540 # ID where the value will be stored
541 id="data_correlation",
543 name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC,
545 help="Y/N/ - Yes/No ",
546 validators=(wiz.required_validator, wiz.boolean_validator),
550 # ID where the value will be stored
551 id="data_cond_indep",
553 name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC,
555 help="Y/N/. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent",
556 validators=(wiz.required_validator, wiz.boolean_validator),
562 def gen_about_output_wizard(self):
564 Generic Wizard - Goal, metrics, data
566 self.wiz_generic_data_output = wiz.PromptWizard(
567 name=Bcolors.OKBLUE+"Understanding Data Output"+Bcolors.ENDC,
570 # The list of input prompts to ask the user.
572 # ID where the value will be stored
573 id="data_type_output",
575 name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
577 help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass",
578 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
582 # ID where the value will be stored
583 id="data_output_prob",
585 name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC,
588 validators=(wiz.required_validator, wiz.boolean_validator),
595 def unsupervised_wizard(self):
597 The Un-Supervized Learning Wizard
599 self.wiz_unsupervised = wiz.PromptWizard(
600 name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
603 # The list of input prompts to ask the user.
605 # ID where the value will be stored
608 name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC,
610 help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n",
611 validators=(wiz.required_validator, wiz.int_validator(1, 3)),
615 # ID where the value will be stored
616 id="unsup_dr_topic_mod",
618 name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC,
621 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
622 'y','n','na','nA'])),
626 # ID where the value will be stored
629 name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
632 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
633 'y','n','na','nA'])),
637 # ID where the value will be stored
638 id="unsup_clus_outliers",
640 name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
643 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
644 'y','n','na','nA'])),
648 # ID where the value will be stored
649 id="unsup_clus_groups",
651 name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC,
654 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
655 'y','n','na','nA'])),
662 def reinforcement_wizard(self):
664 The Reinforced Learning Wizard
668 |-------| Agent | Action
674 | |----|Environment| |
678 self.wiz_reinforcement = wiz.PromptWizard(
679 name=Bcolors.OKBLUE+"Reinforcement Specific"+Bcolors.ENDC,
682 # The list of input prompts to ask the user.
684 # ID where the value will be stored
687 name=Bcolors.HEADER+" Type help for reference diagram for reinforcement-learning"+Bcolors.ENDC,
690 validators=(wiz.required_validator),
691 default='Type Help or Press Enter'
694 # ID where the value will be stored
695 id="ri_model_preference",
697 name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC,
700 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
701 'y','n','na','nA'])),
705 # ID where the value will be stored
706 id="ri_model_availability",
708 name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
711 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
712 'y','n','na','nA'])),
716 # ID where the value will be stored
717 id="ri_modelfree_value",
719 name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
722 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
723 'y','n','na','nA'])),
727 # ID where the value will be stored
728 id="ri_modelfree_value_state",
730 name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC,
733 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
734 'y','n','na','nA'])),
738 # ID where the value will be stored
741 name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
743 help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n",
744 validators=(wiz.required_validator, wiz.int_validator(1, 8)),
750 ############### All the Run Operations ######################
751 def run_mainwiz(self):
755 self.main_wizard_l1()
756 self.main_l1_values = self.wiz_main_l1.run(self.shell)
757 if self.main_l1_values['data_availability']:
759 self.main_wizard_l2_b()
760 self.main_l2b_values = self.wiz_main_l2_b.run(self.shell)
761 if self.main_l2b_values['data_label']:
762 self.supervised = True
764 self.unsupervised = True
765 if self.main_l2b_values['data_programmability']:
766 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
768 self.main_wizard_l3()
769 self.main_l3_values = self.wiz_main_l3.run(self.shell)
770 if self.main_l3_values['data_knowledge']:
771 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
772 self.ml_needed = True
774 self.main_wizard_l4()
775 self.main_l4_values = self.wiz_main_l4.run(self.shell)
776 if self.main_l4_values['data_pattern']:
777 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
778 self.ml_needed = True
780 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
782 self.main_wizard_l2_a()
783 self.main_l2a_values = self.wiz_main_l2_a.run(self.shell)
784 if self.main_l2a_values['data_creativity']:
785 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
786 self.ml_needed = True
787 self.reinforcement = True
789 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
791 def run_generic_wizard(self):
795 self.gen_choice_wizard()
796 self.gen_choice_values = self.wiz_generic_choice.run(self.shell)
797 if self.gen_choice_values['data_metrics_pref']:
798 self.gen_metrics_wizard()
799 self.gen_metrics_values = self.wiz_generic_metrics.run(self.shell)
800 if self.gen_choice_values['data_main']:
801 self.gen_data_main_wizard()
802 self.gen_data_main_values = self.wiz_generic_data_main.run(self.shell)
803 if int(self.gen_data_main_values['data_column']) == 3:
804 self.gen_data_text_wizard()
805 self.gen_data_text_values = self.wiz_generic_data_text.run(self.shell)
807 self.gen_data_text_values = {'data_text_type': '3'}
808 if int(self.gen_data_main_values['data_column']) == 1:
809 self.gen_data_features_wizard()
810 self.gen_data_features_values = self.wiz_generic_data_features.run(self.shell)
812 self.gen_data_features_values = {'data_features': 'Y',
813 'data_features_count': '10'}
814 if int(self.gen_data_main_values['data_column']) == 2:
815 self.gen_data_signal_wizard()
816 self.gen_data_signal_values = self.wiz_generic_data_signal.run(self.shell)
818 self.gen_data_signal_values = {'data_signal_type': '1'}
820 self.gen_data_main_values = {'data_column': '1'}
821 print("Unknown Data Type")
822 if self.gen_choice_values['data_databasic_pref']:
823 self.gen_about_data_basic_wizard()
824 self.gen_about_data_basic_values = self.wiz_generic_data_basic.run(self.shell)
826 self.gen_about_data_basic_values = {'data_missing':'N',
827 'data_size_bytes': '1G',
828 'data_size_samples': '1M'}
829 if self.gen_choice_values['data_dataadv_pref']:
830 self.gen_about_data_advanced_wizard()
831 self.gen_about_data_adv_values = self.wiz_generic_data_adv.run(self.shell)
833 self.gen_about_data_adv_values = {'data_distribution': 'N',
834 'data_io_relation': 'N',
835 'data_correlation': 'N',
836 'data_cond_indep': 'N'}
837 if self.gen_choice_values['data_dataoutput_pref']:
838 self.gen_about_output_wizard()
839 self.gen_about_data_output_values = self.wiz_generic_data_output.run(self.shell)
841 self.gen_about_data_output_values = {'data_type_output': '1',
842 'data_output_prob': 'N'}
845 def run_unsupervised_wizard(self):
847 Run UnSupervised Learning Wizard.
849 self.unsupervised_wizard()
850 self.unsup_values = self.wiz_unsupervised.run(self.shell)
852 def run_reinforcement_wizard(self):
854 Run Reinforced Learning Wizard
856 self.reinforcement_wizard()
857 self.ri_values = self.wiz_reinforcement.run(self.shell)
859 def decide_unsupervised(self):
861 Decide which Unsupervised-learning to use
865 if int(self.unsup_values['unsup_goal']) == 1:
867 if 'high' in self.data_size:
868 if not self.reproducibility:
873 if 'y' in self.unsup_values['unsup_clus_dv'].lower():
874 if 'y' in self.unsup_values['unsup_clus_groups'].lower():
877 print("Unsupervised Learning model to consider: Hierarchical Clustering")
882 if 'y' in self.unsup_values['unsup_clus_outliers'].lower():
883 print("Unsupervised Learning model to consider: Hierarchical Clustering")
885 print("Unsupervised Learning model to consider: DBSCAN")
888 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
889 print("Unsupervised Learning model to consider: Gaussian Mixture")
891 print("Unsupervised Learning model to consider: KMeans")
893 elif int(self.unsup_values['unsup_goal']) == 2:
894 # Dimensionality Reduction
895 if 'y' in self.unsup_values['unsup_dr_topic_mod'].lower():
896 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
897 print("Unsupervised Learning model to consider: SVD")
899 print("Unsupervised Learning model to consider: LDA")
901 print("Unsupervised Learning model to consider: PCA")
903 print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
905 def decide_reinforcement(self):
907 Decide which reinforement learning to use.
909 if (int(self.gen_about_data_output_values['data_type_output']) == 2 or
910 'y' in self.ri_values['ri_model_preference'].lower()):
912 if 'y' in self.ri_values['ri_model_availability'].lower():
913 print("Reinforcement Learning model to consider - AlphaZero")
915 print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE")
916 elif 'n' in self.ri_values['ri_model_preference'].lower():
917 # Model-Free based approach.
918 if 'y' not in self.ri_values['ri_modelfree_value'].lower():
919 print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic")
921 if 'y' in self.ri_values['ri_modelfree_value_state'].lower():
922 print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)")
924 print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets")
927 print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
929 def perform_inference(self):
931 Perform Inferences. Used across all 3 types.
933 # Decide whether data is Low or High
934 self.data_size = 'unknown'
935 if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
936 't' in self.gen_about_data_basic_values['data_size_samples']):
937 self.data_size = 'low'
939 if int(self.gen_metrics_values['metric_interpretability']) >= 3 :
940 self.interpretability = True
941 if int(self.gen_metrics_values['metric_speed']) >= 3 :
943 if int(self.gen_metrics_values['metric_reproducibility']) >= 3 :
944 self.reproducibility = True
946 # Decide Features relative to Data (ftod_ratio) - high/low
947 if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
948 't' in self.gen_about_data_basic_values['data_size_samples']):
949 if int(self.gen_data_features_values['data_features_count']) > 50:
950 self.ftod_ratio = 'high'
951 elif ('m' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
952 'm' in self.gen_about_data_basic_values['data_size_samples']):
953 if int(self.gen_data_features_values['data_features_count']) > 5000:
954 self.ftod_ratio = 'high'
956 if int(self.gen_data_features_values['data_features_count']) > 500000:
957 self.ftod_ratio = 'high'
960 def decide_supervised(self):
962 Decide which Supervised learning to use.
964 if 'high' in self.data_size:
965 # Cover: DT, RF, RNN, CNN, ANN and Naive Bayes
966 if self.interpretability:
968 print("Supervised Learning model to consider - Decision Tree")
970 print("Supervised Learning model to consider - Random Forest")
972 if int(self.gen_data_main_values['data_column']) == 3:
973 print("Supervised Learning model to consider - RNN")
974 elif (int(self.gen_data_main_values['data_column']) == 2 and
975 int(self.gen_data_signal_values['data_signal_type']) == 1):
976 print("Supervised Learning model to consider - CNN")
977 elif (int(self.gen_data_main_values['data_column']) == 2 and
978 (int(self.gen_data_signal_values['data_signal_type']) == 2 or
979 int(self.gen_data_signal_values['data_signal_type']) == 3)):
980 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
981 print("Supervised Learning model to consider - Naive Bayes")
983 print("Supervised Learning model to consider - ANN")
985 print("Supervised model to consider Learning - ANN")
986 elif 'low' in self.data_size:
989 if 'high' in self.ftod_ratio:
992 print("Supervised Learning model to consider - SVN with Gaussian Kernel")
994 if int(self.gen_about_data_output_values['data_type_output']) != 2:
997 if 'y' in self.gen_about_data_adv_values['data_io_relation'].lower():
998 print("Supervised Learning model to consider - Linear Regression or Linear SVM")
1000 print("Supervised Learning model to consider - Polynomial Regression or nonLinear SVM")
1003 if int(self.gen_about_data_output_values['data_output_type']) == 4:
1004 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
1005 if 'y' in self.gen_about_data_adv_values['data_cond_indep'].lower():
1006 print("Supervised Learning model to consider - Naive Bayes")
1008 if 'y' in self.gen_about_data_adv_values['data_correlation'].lower():
1009 print("Supervised Learning model to consider - LASSO or Ridge Regression")
1011 print("Supervised Learning model to consider - Logistic Regression")
1013 print("Supervised Learning model to consider - Polynomial Regression or nonLinear SVM")
1016 print("Supervised Learning model to consider - KNN")
1019 print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
1021 def ask_and_decide(self):
1027 self.run_generic_wizard()
1029 self.decide_supervised()
1030 elif self.unsupervised:
1031 self.run_unsupervised_wizard()
1032 self.decide_unsupervised()
1033 elif self.reinforcement:
1034 self.run_reinforcement_wizard()
1035 self.decide_reinforcement()
1038 def signal_handler(signum, frame):
1042 print("\n You interrupted, No Suggestion will be provided!")
1043 print(signum, frame)
1051 algowiz = AlgoSelectorWizard()
1052 algowiz.ask_and_decide()
1053 except(KeyboardInterrupt, MemoryError):
1054 print("Some Error Occured - No Suggestion can be provided")
1056 print("Thanks for using the Algoselector-Wizard, " +
1057 "Hope our suggestion will be useful")
1059 if __name__ == "__main__":
1060 signal.signal(signal.SIGINT, signal_handler)