1 # Copyright 2022 Linux Foundation.
2 # srao@linuxfoundation.org
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
17 Tool to suggest which ML approach is more applicable for
18 a particular data and usecase.
21 2. Add Informative data to the user.
22 3. Check for Size Entry - 1G/K ..
25 from __future__ import print_function
28 from pypsi import wizard as wiz
29 from pypsi.shell import Shell
31 # pylint: disable=line-too-long,too-few-public-methods,too-many-instance-attributes, too-many-nested-blocks, too-many-return-statements, too-many-branches
46 class AlgoSelectorWizard():
48 Class to create wizards
52 Perform Initialization.
55 # Set of all values from the user
57 self.main_l1_values = {}
58 self.main_l2a_values = {}
59 self.main_l2b_values = {}
60 self.main_l3_values = {}
61 self.main_l4_values = {}
62 self.unsup_values = {}
65 self.gen_choice_values = {}
66 self.gen_metrics_values = {}
67 self.gen_data_main_values = {}
68 self.gen_data_text_values = {}
69 self.gen_data_features_values = {}
70 self.gen_data_signal_values = {}
71 self.gen_about_data_basic_values = {}
72 self.gen_about_data_adv_values = {}
73 self.gen_about_data_output_values = {}
77 self.wiz_main_l1 = None
78 self.wiz_main_l2_a = None
79 self.wiz_main_l2_b = None
80 self.wiz_main_l3 = None
81 self.wiz_main_l4 = None
82 self.wiz_generic = None
83 self.wiz_generic_choice = None
84 self.wiz_geneirc_metric = None
85 self.wiz_generic_data_main = None
86 self.wiz_generic_data_signal = None
87 self.wiz_generic_data_features = None
88 self.wiz_generic_data_text = None
89 self.wiz_generic_data_basic = None
90 self.wiz_generic_data_adv = None
91 self.wiz_generic_data_output = None
92 self.wiz_unsupervised = None
93 self.wiz_reinforcement = None
96 self.ml_needed = False
98 self.supervised = False
99 self.unsupervised = False
100 self.reinforcement = False
101 self.data_size = 'high'
102 self.interpretability = False
104 self.ftod_ratio = 'low'
105 self.reproducibility = False
108 ############# All the Wizards ##################################
110 ### GENERIC Wizards - Need for ML ##############################
111 def main_wizard_l1(self):
115 self.wiz_main_l1 = wiz.PromptWizard(
116 name=Bcolors.OKBLUE+"Do you Need ML - Data Availability"+Bcolors.ENDC,
119 # The list of input prompts to ask the user.
121 # ID where the value will be stored
122 id="data_availability",
124 name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC,
127 validators=(wiz.required_validator, wiz.boolean_validator),
133 def gans_wizard(self):
137 self.wiz_gans = wiz.PromptWizard(
138 name=Bcolors.OKBLUE+"Synthetic Data Genration using GANs"+Bcolors.ENDC,
141 # The list of input prompts to ask the user.
143 # ID where the value will be stored
146 name=Bcolors.HEADER+"Is the sample data you have is time-series? Answer Y/N - Yes/No"+Bcolors.ENDC,
149 validators=(wiz.required_validator, wiz.boolean_validator),
153 # ID where the value will be stored
154 id="gans_data_variables",
156 name=Bcolors.HEADER+"Is the sample data you have is multi-variate (more than one features/columns) ? Answer Y/N - Yes/No"+Bcolors.ENDC,
159 validators=(wiz.required_validator, wiz.boolean_validator),
166 def main_wizard_l2_a(self):
170 self.wiz_main_l2_a = wiz.PromptWizard(
171 name=Bcolors.OKBLUE+"Do you Need ML - Data Creation"+Bcolors.ENDC,
174 # The list of input prompts to ask the user.
176 # ID where the value will be stored
177 id="data_creativity",
179 name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC,
182 validators=(wiz.required_validator, wiz.boolean_validator),
188 def main_wizard_l2_b(self):
192 gan = """ Synthetic data generation is an important use-case for Telco-scenarios, due to difficulty in getting good dataset."""
193 label = """ One or more meaningful and informative 'tag' to provide context so that a machine learning model can learn from it. For example, labels might indicate whether a photo contains a bird or car, which words were uttered in an audio recording, or if an x-ray contains a tumor. Data labeling is required for a variety of use cases including computer vision, natural language processing, and speech recognition."""
194 self.wiz_main_l2_b = wiz.PromptWizard(
195 name=Bcolors.OKBLUE+"Do you Need ML - Data Programmability"+Bcolors.ENDC,
198 # The list of input prompts to ask the user.
200 # ID where the value will be stored
201 id="data_generation",
203 name=Bcolors.HEADER+" Do you want to generate Synthetic Data from the existing data (Type Y/N - Yes/No). Type helfp for the description"+Bcolors.ENDC,
206 validators=(wiz.required_validator, wiz.boolean_validator),
210 # ID where the value will be stored
213 name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N - Yes/No). Type help for description of label. "+Bcolors.ENDC,
216 validators=(wiz.required_validator, wiz.boolean_validator),
220 # ID where the value will be stored
221 id="data_programmability",
223 name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
226 validators=(wiz.required_validator, wiz.boolean_validator),
233 def main_wizard_l3(self):
237 self.wiz_main_l3 = wiz.PromptWizard(
238 name=Bcolors.OKBLUE+"Do you Need ML - Data Knowledge"+Bcolors.ENDC,
241 # The list of input prompts to ask the user.
243 # ID where the value will be stored
246 name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
249 validators=(wiz.required_validator, wiz.boolean_validator),
255 def main_wizard_l4(self):
259 self.wiz_main_l4 = wiz.PromptWizard(
260 name=Bcolors.OKBLUE+"Do you Need ML - Data Pattern"+Bcolors.ENDC,
263 # The list of input prompts to ask the user.
265 # ID where the value will be stored
268 name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC,
270 help="Y/N - Yes/No.",
271 validators=(wiz.required_validator, wiz.boolean_validator),
276 ### GENERIC Wizards - GOAL, METRICS, DATA ##############################
277 def gen_choice_wizard(self):
279 Generic Wizard - Goal, metrics, data
281 self.wiz_generic_choice = wiz.PromptWizard(
282 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
285 # The list of input prompts to ask the user.
287 # ID where the value will be stored
290 name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC,
292 help="Enter one of Predict/Describe/Explore",
293 validators=(wiz.required_validator, wiz.choice_validator(['Predict',
302 # ID where the value will be stored
303 id="data_metrics_pref",
305 name=Bcolors.HEADER+" Do you know which metrics (speed, accuracy, etc.) are more important for you? "+Bcolors.ENDC,
308 validators=(wiz.required_validator, wiz.boolean_validator),
312 # ID where the value will be stored
315 name=Bcolors.HEADER+" Do you know about the input data type (If its signal/features/text) ? "+Bcolors.ENDC,
318 validators=(wiz.required_validator, wiz.boolean_validator),
322 # ID where the value will be stored
323 id="data_databasic_pref",
325 name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the input data? "+Bcolors.ENDC,
328 validators=(wiz.required_validator, wiz.boolean_validator),
332 # ID where the value will be stored
333 id="data_dataadv_pref",
335 name=Bcolors.HEADER+" Do you have advanced information (distribution, relation, independency, etc.) about the input data? "+Bcolors.ENDC,
338 validators=(wiz.required_validator, wiz.boolean_validator),
342 # ID where the value will be stored
343 id="data_dataoutput_pref",
345 name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the output? "+Bcolors.ENDC,
348 validators=(wiz.required_validator, wiz.boolean_validator),
354 def gen_metrics_wizard(self):
356 Generic Wizard - Goal, metrics, data
358 self.wiz_generic_metrics = wiz.PromptWizard(
359 name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
362 # The list of input prompts to ask the user.
364 # ID where the value will be stored
365 id="metric_accuracy",
367 name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
369 help="Enter 1-5: 1 being least important, and 5 being most important",
370 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
374 # ID where the value will be stored
377 name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
379 help="Enter 1-5: 1 being least important, and 5 being most important",
380 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
384 # ID where the value will be stored
385 id="metric_interpretability",
387 name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
389 help="Enter 1-5: 1 being least important, and 5 being most important",
390 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
394 # ID where the value will be stored
395 id="metric_reproducibility",
397 name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
399 help="Enter 1-5: 1 being least important, and 5 being most important",
400 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
404 # ID where the value will be stored
405 id="metric_implementation",
407 name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
409 help="Enter 1-5: 1 being least important, and 5 being most important",
410 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
416 def gen_data_main_wizard(self):
418 Generic Wizard - Goal, metrics, data
420 self.wiz_generic_data_main = wiz.PromptWizard(
421 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
424 # The list of input prompts to ask the user.
426 # ID where the value will be stored
429 name=Bcolors.HEADER+" What does the data (columns) represent? Please type help and select the associated number"+Bcolors.ENDC,
431 help="1. Well Defined Features\n 2. Signals - Timeseries, pixels, etc\n 3. Text - Unstructured\n 4. None of the above\n",
432 validators=(wiz.required_validator, wiz.int_validator(1, 4)),
438 def gen_data_signal_wizard(self):
440 Generic Wizard - Goal, metrics, data
442 self.wiz_generic_data_signal = wiz.PromptWizard(
443 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
446 # The list of input prompts to ask the user.
448 # ID where the value will be stored
449 id="data_signal_type",
451 name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? Please type help for list "+Bcolors.ENDC,
453 help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n ",
454 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
460 def gen_data_features_wizard(self):
462 Generic Wizard - Goal, metrics, data
464 self.wiz_generic_data_features = wiz.PromptWizard(
465 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
468 # The list of input prompts to ask the user.
470 # ID where the value will be stored
473 name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC,
476 validators=(wiz.required_validator, wiz.boolean_validator),
480 # ID where the value will be stored
481 id="data_features_count",
483 name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC,
485 help="Number only - Approximate should be OK.",
486 validators=(wiz.required_validator, wiz.int_validator(1, 100000)),
492 def gen_data_text_wizard(self):
494 Generic Wizard - Goal, metrics, data
496 self.wiz_generic_data_text = wiz.PromptWizard(
497 name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
500 # The list of input prompts to ask the user.
502 # ID where the value will be stored
505 name=Bcolors.HEADER+" If Text, can you choose any one from the below list? Please type help for list"+Bcolors.ENDC,
507 help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n ",
508 validators=(wiz.required_validator, wiz.int_validator(1, 8)),
515 def gen_about_data_basic_wizard(self):
517 Generic Wizard - Goal, metrics, data
519 self.wiz_generic_data_basic = wiz.PromptWizard(
520 name=Bcolors.OKBLUE+"Understanding Basic Input Data Information"+Bcolors.ENDC,
523 # The list of input prompts to ask the user.
525 # ID where the value will be stored
528 name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC,
531 validators=(wiz.required_validator, wiz.boolean_validator),
535 # ID where the value will be stored
536 id="data_size_bytes",
538 name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC,
540 help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes",
541 validators=(wiz.required_validator),
545 # ID where the value will be stored
546 id="data_size_samples",
548 name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC,
550 help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples",
551 validators=(wiz.required_validator),
557 def gen_about_data_advanced_wizard(self):
559 Generic Wizard - Goal, metrics, data
561 self.wiz_generic_data_adv = wiz.PromptWizard(
562 name=Bcolors.OKBLUE+"Understanding Advanced Input Data Information"+Bcolors.ENDC,
565 # The list of input prompts to ask the user.
567 # ID where the value will be stored
568 id="data_distribution",
570 name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC,
573 validators=(wiz.required_validator, wiz.boolean_validator),
577 # ID where the value will be stored
578 id="data_io_relation",
580 name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC,
583 validators=(wiz.required_validator, wiz.boolean_validator),
587 # ID where the value will be stored
588 id="data_correlation",
590 name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC,
592 help="Y/N/ - Yes/No ",
593 validators=(wiz.required_validator, wiz.boolean_validator),
597 # ID where the value will be stored
598 id="data_cond_indep",
600 name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC,
602 help="Y/N/. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent",
603 validators=(wiz.required_validator, wiz.boolean_validator),
609 def gen_about_output_wizard(self):
611 Generic Wizard - Goal, metrics, data
613 self.wiz_generic_data_output = wiz.PromptWizard(
614 name=Bcolors.OKBLUE+"Understanding Data Output"+Bcolors.ENDC,
617 # The list of input prompts to ask the user.
619 # ID where the value will be stored
620 id="data_type_output",
622 name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
624 help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass",
625 validators=(wiz.required_validator, wiz.int_validator(1, 5)),
629 # ID where the value will be stored
630 id="data_output_prob",
632 name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC,
635 validators=(wiz.required_validator, wiz.boolean_validator),
642 def unsupervised_wizard(self):
644 The Un-Supervized Learning Wizard
646 self.wiz_unsupervised = wiz.PromptWizard(
647 name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
650 # The list of input prompts to ask the user.
652 # ID where the value will be stored
655 name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC,
657 help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n",
658 validators=(wiz.required_validator, wiz.int_validator(1, 3)),
662 # ID where the value will be stored
663 id="unsup_dr_topic_mod",
665 name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC,
668 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
669 'y','n','na','nA'])),
673 # ID where the value will be stored
676 name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
679 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
680 'y','n','na','nA'])),
684 # ID where the value will be stored
685 id="unsup_clus_outliers",
687 name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
690 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
691 'y','n','na','nA'])),
695 # ID where the value will be stored
696 id="unsup_clus_groups",
698 name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC,
701 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
702 'y','n','na','nA'])),
709 def reinforcement_wizard(self):
711 The Reinforced Learning Wizard
715 |-------| Agent | Action
721 | |----|Environment| |
725 self.wiz_reinforcement = wiz.PromptWizard(
726 name=Bcolors.OKBLUE+"Reinforcement Specific"+Bcolors.ENDC,
729 # The list of input prompts to ask the user.
731 # ID where the value will be stored
734 name=Bcolors.HEADER+" Type help for reference diagram for reinforcement-learning"+Bcolors.ENDC,
737 validators=(wiz.required_validator),
738 default='Type Help or Press Enter'
741 # ID where the value will be stored
742 id="ri_model_preference",
744 name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC,
747 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
748 'y','n','na','nA'])),
752 # ID where the value will be stored
753 id="ri_model_availability",
755 name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
758 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
759 'y','n','na','nA'])),
763 # ID where the value will be stored
764 id="ri_modelfree_value",
766 name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
769 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
770 'y','n','na','nA'])),
774 # ID where the value will be stored
775 id="ri_modelfree_value_state",
777 name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC,
780 validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
781 'y','n','na','nA'])),
785 # ID where the value will be stored
788 name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
790 help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n",
791 validators=(wiz.required_validator, wiz.int_validator(1, 8)),
797 ############### All the Run Operations ######################
798 def run_mainwiz(self):
802 self.main_wizard_l1()
803 self.main_l1_values = self.wiz_main_l1.run(self.shell)
804 if self.main_l1_values['data_availability']:
806 self.main_wizard_l2_b()
807 self.main_l2b_values = self.wiz_main_l2_b.run(self.shell)
808 if self.main_l2b_values['data_label']:
809 self.supervised = True
811 self.unsupervised = True
812 if self.main_l2b_values['data_programmability']:
813 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
814 elif self.main_l2b_values['data_generation']:
815 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
816 self.ml_needed = True
819 self.main_wizard_l3()
820 self.main_l3_values = self.wiz_main_l3.run(self.shell)
821 if self.main_l3_values['data_knowledge']:
822 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
823 self.ml_needed = True
825 self.main_wizard_l4()
826 self.main_l4_values = self.wiz_main_l4.run(self.shell)
827 if self.main_l4_values['data_pattern']:
828 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
829 self.ml_needed = True
831 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
833 self.main_wizard_l2_a()
834 self.main_l2a_values = self.wiz_main_l2_a.run(self.shell)
835 if self.main_l2a_values['data_creativity']:
836 print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
837 self.ml_needed = True
838 self.reinforcement = True
840 print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
842 def run_gans_wizard(self):
847 self.gans_values = self.wiz_gans.run(self.shell)
848 if self.gans_values['gans_data_type']:
849 if self.gans_values['gans_data_variables']:
850 print("GANs technique to consider: TTS-GAN")
852 print("GANs technique to consider: TimeGAN")
854 print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
859 def run_generic_wizard(self):
863 self.gen_choice_wizard()
864 self.gen_choice_values = self.wiz_generic_choice.run(self.shell)
865 if self.gen_choice_values['data_metrics_pref']:
866 self.gen_metrics_wizard()
867 self.gen_metrics_values = self.wiz_generic_metrics.run(self.shell)
868 if self.gen_choice_values['data_main']:
869 self.gen_data_main_wizard()
870 self.gen_data_main_values = self.wiz_generic_data_main.run(self.shell)
871 if int(self.gen_data_main_values['data_column']) == 3:
872 self.gen_data_text_wizard()
873 self.gen_data_text_values = self.wiz_generic_data_text.run(self.shell)
875 self.gen_data_text_values = {'data_text_type': '3'}
876 if int(self.gen_data_main_values['data_column']) == 1:
877 self.gen_data_features_wizard()
878 self.gen_data_features_values = self.wiz_generic_data_features.run(self.shell)
880 self.gen_data_features_values = {'data_features': 'Y',
881 'data_features_count': '10'}
882 if int(self.gen_data_main_values['data_column']) == 2:
883 self.gen_data_signal_wizard()
884 self.gen_data_signal_values = self.wiz_generic_data_signal.run(self.shell)
886 self.gen_data_signal_values = {'data_signal_type': '1'}
888 self.gen_data_main_values = {'data_column': '1'}
889 print("Unknown Data Type")
890 if self.gen_choice_values['data_databasic_pref']:
891 self.gen_about_data_basic_wizard()
892 self.gen_about_data_basic_values = self.wiz_generic_data_basic.run(self.shell)
894 self.gen_about_data_basic_values = {'data_missing':'N',
895 'data_size_bytes': '1G',
896 'data_size_samples': '1M'}
897 if self.gen_choice_values['data_dataadv_pref']:
898 self.gen_about_data_advanced_wizard()
899 self.gen_about_data_adv_values = self.wiz_generic_data_adv.run(self.shell)
901 self.gen_about_data_adv_values = {'data_distribution': 'N',
902 'data_io_relation': 'N',
903 'data_correlation': 'N',
904 'data_cond_indep': 'N'}
905 if self.gen_choice_values['data_dataoutput_pref']:
906 self.gen_about_output_wizard()
907 self.gen_about_data_output_values = self.wiz_generic_data_output.run(self.shell)
909 self.gen_about_data_output_values = {'data_type_output': '1',
910 'data_output_prob': 'N'}
913 def run_unsupervised_wizard(self):
915 Run UnSupervised Learning Wizard.
917 self.unsupervised_wizard()
918 self.unsup_values = self.wiz_unsupervised.run(self.shell)
920 def run_reinforcement_wizard(self):
922 Run Reinforced Learning Wizard
924 self.reinforcement_wizard()
925 self.ri_values = self.wiz_reinforcement.run(self.shell)
927 def decide_unsupervised(self):
929 Decide which Unsupervised-learning to use
933 if int(self.unsup_values['unsup_goal']) == 1:
935 if 'high' in self.data_size:
936 if not self.reproducibility:
941 if 'y' in self.unsup_values['unsup_clus_dv'].lower():
942 if 'y' in self.unsup_values['unsup_clus_groups'].lower():
945 print("Unsupervised Learning model to consider: Hierarchical Clustering")
950 if 'y' in self.unsup_values['unsup_clus_outliers'].lower():
951 print("Unsupervised Learning model to consider: Hierarchical Clustering")
953 print("Unsupervised Learning model to consider: DBSCAN")
956 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
957 print("Unsupervised Learning model to consider: Gaussian Mixture")
959 print("Unsupervised Learning model to consider: KMeans")
961 elif int(self.unsup_values['unsup_goal']) == 2:
962 # Dimensionality Reduction
963 if 'y' in self.unsup_values['unsup_dr_topic_mod'].lower():
964 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
965 print("Unsupervised Learning model to consider: SVD")
967 print("Unsupervised Learning model to consider: LDA")
969 print("Unsupervised Learning model to consider: PCA")
971 print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
973 def decide_reinforcement(self):
975 Decide which reinforement learning to use.
977 if (int(self.gen_about_data_output_values['data_type_output']) == 2 or
978 'y' in self.ri_values['ri_model_preference'].lower()):
980 if 'y' in self.ri_values['ri_model_availability'].lower():
981 print("Reinforcement Learning model to consider - AlphaZero")
983 print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE")
984 elif 'n' in self.ri_values['ri_model_preference'].lower():
985 # Model-Free based approach.
986 if 'y' not in self.ri_values['ri_modelfree_value'].lower():
987 print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic")
989 if 'y' in self.ri_values['ri_modelfree_value_state'].lower():
990 print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)")
992 print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets")
995 print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
997 def perform_inference(self):
999 Perform Inferences. Used across all 3 types.
1001 # Decide whether data is Low or High
1002 self.data_size = 'unknown'
1003 if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
1004 't' in self.gen_about_data_basic_values['data_size_samples']):
1005 self.data_size = 'low'
1007 if int(self.gen_metrics_values['metric_interpretability']) >= 3 :
1008 self.interpretability = True
1009 if int(self.gen_metrics_values['metric_speed']) >= 3 :
1011 if int(self.gen_metrics_values['metric_reproducibility']) >= 3 :
1012 self.reproducibility = True
1014 # Decide Features relative to Data (ftod_ratio) - high/low
1015 if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
1016 't' in self.gen_about_data_basic_values['data_size_samples']):
1017 if int(self.gen_data_features_values['data_features_count']) > 50:
1018 self.ftod_ratio = 'high'
1019 elif ('m' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
1020 'm' in self.gen_about_data_basic_values['data_size_samples']):
1021 if int(self.gen_data_features_values['data_features_count']) > 5000:
1022 self.ftod_ratio = 'high'
1024 if int(self.gen_data_features_values['data_features_count']) > 500000:
1025 self.ftod_ratio = 'high'
1028 def decide_supervised(self):
1030 Decide which Supervised learning to use.
1032 if 'high' in self.data_size:
1033 # Cover: DT, RF, RNN, CNN, ANN and Naive Bayes
1034 if self.interpretability:
1036 print("Supervised Learning model to consider - Decision Tree")
1038 print("Supervised Learning model to consider - Random Forest")
1040 if int(self.gen_data_main_values['data_column']) == 3:
1041 print("Supervised Learning model to consider - RNN")
1042 elif (int(self.gen_data_main_values['data_column']) == 2 and
1043 int(self.gen_data_signal_values['data_signal_type']) == 1):
1044 print("Supervised Learning model to consider - CNN")
1045 elif (int(self.gen_data_main_values['data_column']) == 2 and
1046 (int(self.gen_data_signal_values['data_signal_type']) == 2 or
1047 int(self.gen_data_signal_values['data_signal_type']) == 3)):
1048 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
1049 print("Supervised Learning model to consider - Naive Bayes")
1051 print("Supervised Learning model to consider - ANN")
1053 print("Supervised model to consider Learning - ANN")
1054 elif 'low' in self.data_size:
1056 # Cover: Regressions
1057 if 'high' in self.ftod_ratio:
1060 print("Supervised Learning model to consider - SVN with Gaussian Kernel")
1062 if int(self.gen_about_data_output_values['data_type_output']) != 2:
1065 if 'y' in self.gen_about_data_adv_values['data_io_relation'].lower():
1066 print("Supervised Learning model to consider - Linear Regression or Linear SVM")
1068 print("Supervised Learning model to consider - Polynomial Regression or nonLinear SVM")
1071 if int(self.gen_about_data_output_values['data_output_type']) == 4:
1072 if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
1073 if 'y' in self.gen_about_data_adv_values['data_cond_indep'].lower():
1074 print("Supervised Learning model to consider - Naive Bayes")
1076 if 'y' in self.gen_about_data_adv_values['data_correlation'].lower():
1077 print("Supervised Learning model to consider - LASSO or Ridge Regression")
1079 print("Supervised Learning model to consider - Logistic Regression")
1081 print("Supervised Learning model to consider - Polynomial Regression or nonLinear SVM")
1084 print("Supervised Learning model to consider - KNN")
1087 print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
1089 def ask_and_decide(self):
1095 self.run_gans_wizard()
1098 self.run_generic_wizard()
1100 self.decide_supervised()
1101 elif self.unsupervised:
1102 self.run_unsupervised_wizard()
1103 self.decide_unsupervised()
1104 elif self.reinforcement:
1105 self.run_reinforcement_wizard()
1106 self.decide_reinforcement()
1109 def signal_handler(signum, frame):
1113 print("\n You interrupted, No Suggestion will be provided!")
1114 print(signum, frame)
1122 algowiz = AlgoSelectorWizard()
1123 algowiz.ask_and_decide()
1124 except(KeyboardInterrupt, MemoryError):
1125 print("Some Error Occured - No Suggestion can be provided")
1127 print("Thanks for using the Algoselector-Wizard, " +
1128 "Hope our suggestion will be useful")
1130 if __name__ == "__main__":
1131 signal.signal(signal.SIGINT, signal_handler)