From: Sridhar K. N. Rao Date: Tue, 3 May 2022 00:28:12 +0000 (+0530) Subject: [TOOL] Model Selector update. X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=commitdiff_plain;h=77c3e230fb707d56044234a6e928760c96c95e54;p=thoth.git [TOOL] Model Selector update. This patch adds stability updates to Model Selector Signed-off-by: Sridhar K. N. Rao Change-Id: I74b4e9a1b1837d85dac7f0d091019a23b1abd7b5 --- diff --git a/tools/modelselector/modelselector.py b/tools/modelselector/modelselector.py index 90b289c..a3a0b0b 100644 --- a/tools/modelselector/modelselector.py +++ b/tools/modelselector/modelselector.py @@ -1,5 +1,5 @@ -# Copyright 2021 Spirent Communications. -# sridhar.rao@spirent.com +# Copyright 2022 Linux Foundation. +# srao@linuxfoundation.org # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,10 +18,8 @@ Tool to suggest which ML approach is more applicable for a particular data and usecase. TODO: 1. Minimize code. -a. Reduce returns. -b. Optimize loops. - 2. Add Informative data to the user. +3. Check for Size Entry - 1G/K .. """ from __future__ import print_function @@ -54,6 +52,7 @@ class AlgoSelectorWizard(): Perform Initialization. """ self.shell = Shell() + # Set of all values from the user self.main_values = {} self.main_l1_values = {} self.main_l2a_values = {} @@ -63,6 +62,16 @@ class AlgoSelectorWizard(): self.unsup_values = {} self.ri_values = {} self.gen_values = {} + self.gen_choice_values = {} + self.gen_metrics_values = {} + self.gen_data_main_values = {} + self.gen_data_text_values = {} + self.gen_data_features_values = {} + self.gen_data_signal_values = {} + self.gen_about_data_basic_values = {} + self.gen_about_data_adv_values = {} + self.gen_about_data_output_values = {} + # Set of Wizards. self.wiz_main = None self.wiz_main_l1 = None self.wiz_main_l2_a = None @@ -70,8 +79,18 @@ class AlgoSelectorWizard(): self.wiz_main_l3 = None self.wiz_main_l4 = None self.wiz_generic = None + self.wiz_generic_choice = None + self.wiz_geneirc_metric = None + self.wiz_generic_data_main = None + self.wiz_generic_data_signal = None + self.wiz_generic_data_features = None + self.wiz_generic_data_text = None + self.wiz_generic_data_basic = None + self.wiz_generic_data_adv = None + self.wiz_generic_data_output = None self.wiz_unsupervised = None self.wiz_reinforcement = None + # Some Inferences self.ml_needed = False self.supervised = False self.unsupervised = False @@ -101,8 +120,8 @@ class AlgoSelectorWizard(): # Display name name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC, # Help message - help="Y/N/U - Yes/No/Unknown", - validators=(wiz.required_validator), + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), default='Y', ), ) @@ -123,8 +142,8 @@ class AlgoSelectorWizard(): # Display name name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC, # Help message - help="Y/N/U - Yes/No/Unknown", - validators=(wiz.required_validator), + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), default='Y', ), ) @@ -144,10 +163,10 @@ class AlgoSelectorWizard(): # ID where the value will be stored id="data_label", # Display name - name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N/U - Yes/No/Unknown). Type help for description of label. "+Bcolors.ENDC, + name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N - Yes/No). Type help for description of label. "+Bcolors.ENDC, # Help message help=label, - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.boolean_validator), default='Y', ), wiz.WizardStep( @@ -156,9 +175,9 @@ class AlgoSelectorWizard(): # Display name name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC, # Help message - help="Y/N/U - Yes/No/Unknown", - validators=(wiz.required_validator), - default='Y', + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), + default='N', ), ) ) @@ -179,8 +198,8 @@ class AlgoSelectorWizard(): # Display name name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC, # Help message - help="Y/N/U - Yes/No/Unknown", - validators=(wiz.required_validator), + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), default='Y', ), ) @@ -201,19 +220,19 @@ class AlgoSelectorWizard(): # Display name name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC, # Help message - help="Y/N/U - Yes/No/Unknown", - validators=(wiz.required_validator), + help="Y/N - Yes/No.", + validators=(wiz.required_validator, wiz.boolean_validator), default='Y' ), ) ) ### GENERIC Wizards - GOAL, METRICS, DATA ############################## - def gen_wizard(self): + def gen_choice_wizard(self): """ Generic Wizard - Goal, metrics, data """ - self.wiz_generic = wiz.PromptWizard( - name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC, + self.wiz_generic_choice = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC, description="", steps=( # The list of input prompts to ask the user. @@ -224,9 +243,76 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC, # Help message help="Enter one of Predict/Describe/Explore", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.choice_validator(['Predict', + 'predict', + 'Describe', + 'describe', + 'Explore', + 'explore'])), default='Explore' ), + wiz.WizardStep( + # ID where the value will be stored + id="data_metrics_pref", + # Display name + name=Bcolors.HEADER+" Do you know which metrics (speed, accuracy, etc.) are more important for you? "+Bcolors.ENDC, + # Help message + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_main", + # Display name + name=Bcolors.HEADER+" Do you know about the input data type (If its signal/features/text) ? "+Bcolors.ENDC, + # Help message + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_databasic_pref", + # Display name + name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the input data? "+Bcolors.ENDC, + # Help message + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_dataadv_pref", + # Display name + name=Bcolors.HEADER+" Do you have advanced information (distribution, relation, independency, etc.) about the input data? "+Bcolors.ENDC, + # Help message + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_dataoutput_pref", + # Display name + name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the output? "+Bcolors.ENDC, + # Help message + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), + default='Y' + ), + ) + ) + + def gen_metrics_wizard(self): + """ + Generic Wizard - Goal, metrics, data + """ + self.wiz_generic_metrics = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. wiz.WizardStep( # ID where the value will be stored id="metric_accuracy", @@ -234,7 +320,7 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, # Help message help="Enter 1-5: 1 being least important, and 5 being most important", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.int_validator(1, 5)), default='1' ), wiz.WizardStep( @@ -244,7 +330,7 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, # Help message help="Enter 1-5: 1 being least important, and 5 being most important", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.int_validator(1, 5)), default='1' ), wiz.WizardStep( @@ -254,7 +340,7 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, # Help message help="Enter 1-5: 1 being least important, and 5 being most important", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.int_validator(1, 5)), default='1' ), wiz.WizardStep( @@ -264,7 +350,7 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, # Help message help="Enter 1-5: 1 being least important, and 5 being most important", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.int_validator(1, 5)), default='1' ), wiz.WizardStep( @@ -274,47 +360,73 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, # Help message help="Enter 1-5: 1 being least important, and 5 being most important", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.int_validator(1, 5)), default='1' ), + ) + ) + + def gen_data_main_wizard(self): + """ + Generic Wizard - Goal, metrics, data + """ + self.wiz_generic_data_main = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. wiz.WizardStep( # ID where the value will be stored id="data_column", # Display name - name=Bcolors.HEADER+" What does the data (columns) represent? well defined 'Features', 'signals' (Timeseries, pixels, etc) or Text - (Please type the associated number)"+Bcolors.ENDC, + name=Bcolors.HEADER+" What does the data (columns) represent? Please type help and select the associated number"+Bcolors.ENDC, # Help message - help="1. Well Defined Features\n 2. Signals\n 3. Text - Unstructured\n 4. None of the above\n", - validators=(wiz.required_validator), - default='Features' + help="1. Well Defined Features\n 2. Signals - Timeseries, pixels, etc\n 3. Text - Unstructured\n 4. None of the above\n", + validators=(wiz.required_validator, wiz.int_validator(1, 4)), + default='1' ), + ) + ) + + def gen_data_signal_wizard(self): + """ + Generic Wizard - Goal, metrics, data + """ + self.wiz_generic_data_signal = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. wiz.WizardStep( # ID where the value will be stored id="data_signal_type", # Display name - name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? "+Bcolors.ENDC, + name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? Please type help for list "+Bcolors.ENDC, # Help message help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n ", - validators=(wiz.required_validator), - default='3' - ), - wiz.WizardStep( - # ID where the value will be stored - id="data_text_type", - # Display name - name=Bcolors.HEADER+" If Text, can you choose any one from the below list? "+Bcolors.ENDC, - # Help message - help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n ", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.int_validator(1, 5)), default='3' ), + ) + ) + + def gen_data_features_wizard(self): + """ + Generic Wizard - Goal, metrics, data + """ + self.wiz_generic_data_features = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. wiz.WizardStep( # ID where the value will be stored id="data_features", # Display name name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC, # Help message - help="Y/N/NA", - validators=(wiz.required_validator), + help="Y/N", + validators=(wiz.required_validator, wiz.boolean_validator), default='Y' ), wiz.WizardStep( @@ -323,80 +435,139 @@ class AlgoSelectorWizard(): # Display name name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC, # Help message - help="Number or NA", - validators=(wiz.required_validator), + help="Number only - Approximate should be OK.", + validators=(wiz.required_validator, wiz.int_validator(1, 100000)), default='10' ), + ) + ) + + def gen_data_text_wizard(self): + """ + Generic Wizard - Goal, metrics, data + """ + self.wiz_generic_data_text = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. wiz.WizardStep( # ID where the value will be stored - id="data_distribution", + id="data_text_type", # Display name - name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC, + name=Bcolors.HEADER+" If Text, can you choose any one from the below list? Please type help for list"+Bcolors.ENDC, # Help message - help="Y/N/U", - validators=(wiz.required_validator), - default='Y' + help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n ", + validators=(wiz.required_validator, wiz.int_validator(1, 8)), + default='3' ), + + ) + ) + + def gen_about_data_basic_wizard(self): + """ + Generic Wizard - Goal, metrics, data + """ + self.wiz_generic_data_basic = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Basic Input Data Information"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. wiz.WizardStep( # ID where the value will be stored - id="data_io_relation", + id="data_missing", # Display name - name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC, + name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC, # Help message - help="Y/N/U", - validators=(wiz.required_validator), - default='Y' + help="Y/N", + validators=(wiz.required_validator, wiz.boolean_validator), + default='N' ), wiz.WizardStep( # ID where the value will be stored - id="data_correlation", + id="data_size_bytes", # Display name - name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC, + name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC, # Help message - help="Y/N/U. Change in one ", + help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes", validators=(wiz.required_validator), - default='Y' + default='1G' ), wiz.WizardStep( # ID where the value will be stored - id="data_cond_indep", + id="data_size_samples", # Display name - name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC, + name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC, # Help message - help="Y/N/U. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent", + help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples", validators=(wiz.required_validator), + default='1M' + ), + ) + ) + + def gen_about_data_advanced_wizard(self): + """ + Generic Wizard - Goal, metrics, data + """ + self.wiz_generic_data_adv = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Advanced Input Data Information"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. + wiz.WizardStep( + # ID where the value will be stored + id="data_distribution", + # Display name + name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC, + # Help message + help="Y/N - Yes", + validators=(wiz.required_validator, wiz.boolean_validator), default='Y' ), wiz.WizardStep( # ID where the value will be stored - id="data_missing", + id="data_io_relation", # Display name - name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC, + name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC, # Help message - help="Y/N/U", - validators=(wiz.required_validator), - default='N' + help="Y/N - Yes/No", + validators=(wiz.required_validator, wiz.boolean_validator), + default='Y' ), wiz.WizardStep( # ID where the value will be stored - id="data_size_bytes", + id="data_correlation", # Display name - name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC, + name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC, # Help message - help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes", - validators=(wiz.required_validator), - default='1G' + help="Y/N/ - Yes/No ", + validators=(wiz.required_validator, wiz.boolean_validator), + default='Y' ), wiz.WizardStep( # ID where the value will be stored - id="data_size_samples", + id="data_cond_indep", # Display name - name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC, + name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC, # Help message - help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples", - validators=(wiz.required_validator), - default='1M' + help="Y/N/. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent", + validators=(wiz.required_validator, wiz.boolean_validator), + default='Y' ), + ) + ) + + def gen_about_output_wizard(self): + """ + Generic Wizard - Goal, metrics, data + """ + self.wiz_generic_data_output = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Data Output"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. wiz.WizardStep( # ID where the value will be stored id="data_type_output", @@ -404,7 +575,7 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC, # Help message help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.int_validator(1, 5)), default='1' ), wiz.WizardStep( @@ -414,7 +585,7 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC, # Help message help="Y/N", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.boolean_validator), default='N' ), ) @@ -425,7 +596,7 @@ class AlgoSelectorWizard(): """ The Un-Supervized Learning Wizard """ - self.wiz_generic = wiz.PromptWizard( + self.wiz_unsupervised = wiz.PromptWizard( name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC, description="", steps=( @@ -437,7 +608,7 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC, # Help message help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.int_validator(1, 3)), default='1' ), wiz.WizardStep( @@ -447,7 +618,8 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC, # Help message help="Y/N/NA", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na', + 'y','n','na','nA'])), default='NA' ), wiz.WizardStep( @@ -457,7 +629,8 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC, # Help message help="Y/N/NA", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na', + 'y','n','na','nA'])), default='NA' ), wiz.WizardStep( @@ -467,7 +640,8 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC, # Help message help="Y/N/NA", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na', + 'y','n','na','nA'])), default='NA' ), wiz.WizardStep( @@ -477,7 +651,8 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC, # Help message help="Y/N/NA", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na', + 'y','n','na','nA'])), default='NA' ), @@ -522,7 +697,8 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC, # Help message help="Y/N/NA", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na', + 'y','n','na','nA'])), default='Y' ), wiz.WizardStep( @@ -532,7 +708,8 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC, # Help message help="Y/N/NA", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na', + 'y','n','na','nA'])), default='Y' ), wiz.WizardStep( @@ -542,7 +719,8 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC, # Help message help="Y/N/NA", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na', + 'y','n','na','nA'])), default='Y' ), wiz.WizardStep( @@ -552,7 +730,8 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC, # Help message help="Y/N/NA", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na', + 'y','n','na','nA'])), default='Y' ), wiz.WizardStep( @@ -562,7 +741,7 @@ class AlgoSelectorWizard(): name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC, # Help message help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n", - validators=(wiz.required_validator), + validators=(wiz.required_validator, wiz.int_validator(1, 8)), default='1' ), ) @@ -575,25 +754,26 @@ class AlgoSelectorWizard(): """ self.main_wizard_l1() self.main_l1_values = self.wiz_main_l1.run(self.shell) - if self.main_l1_values['data_availability'].lower() == 'y': + if self.main_l1_values['data_availability']: + print("OK-1") self.main_wizard_l2_b() self.main_l2b_values = self.wiz_main_l2_b.run(self.shell) - if self.main_l2b_values['data_labe'].lower() == 'y': + if self.main_l2b_values['data_label']: self.supervised = True else: self.unsupervised = True - if self.main_l2b_values['data_programmability'].lower() == 'y': + if self.main_l2b_values['data_programmability']: print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC) else: self.main_wizard_l3() self.main_l3_values = self.wiz_main_l3.run(self.shell) - if self.main_l3_values['data_knowledge'].lower() == 'y': + if self.main_l3_values['data_knowledge']: print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC) self.ml_needed = True else: self.main_wizard_l4() self.main_l4_values = self.wiz_main_l4.run(self.shell) - if self.main_l4_values['data_pattern'].lower() == 'y': + if self.main_l4_values['data_pattern']: print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC) self.ml_needed = True else: @@ -601,7 +781,7 @@ class AlgoSelectorWizard(): else: self.main_wizard_l2_a() self.main_l2a_values = self.wiz_main_l2_a.run(self.shell) - if self.main_l2a_values['data_creativity'].lower() == 'y': + if self.main_l2a_values['data_creativity']: print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC) self.ml_needed = True self.reinforcement = True @@ -612,8 +792,55 @@ class AlgoSelectorWizard(): """ Run Generic Wizard """ - self.gen_wizard() - self.gen_values = self.wiz_generic.run(self.shell) + self.gen_choice_wizard() + self.gen_choice_values = self.wiz_generic_choice.run(self.shell) + if self.gen_choice_values['data_metrics_pref']: + self.gen_metrics_wizard() + self.gen_metrics_values = self.wiz_generic_metrics.run(self.shell) + if self.gen_choice_values['data_main']: + self.gen_data_main_wizard() + self.gen_data_main_values = self.wiz_generic_data_main.run(self.shell) + if int(self.gen_data_main_values['data_column']) == 3: + self.gen_data_text_wizard() + self.gen_data_text_values = self.wiz_generic_data_text.run(self.shell) + else: + self.gen_data_text_values = {'data_text_type': '3'} + if int(self.gen_data_main_values['data_column']) == 1: + self.gen_data_features_wizard() + self.gen_data_features_values = self.wiz_generic_data_features.run(self.shell) + else: + self.gen_data_features_values = {'data_features': 'Y', + 'data_features_count': '10'} + if int(self.gen_data_main_values['data_column']) == 2: + self.gen_data_signal_wizard() + self.gen_data_signal_values = self.wiz_generic_data_signal.run(self.shell) + else: + self.gen_data_signal_values = {'data_signal_type': '1'} + else: + self.gen_data_main_values = {'data_column': '1'} + print("Unknown Data Type") + if self.gen_choice_values['data_databasic_pref']: + self.gen_about_data_basic_wizard() + self.gen_about_data_basic_values = self.wiz_generic_data_basic.run(self.shell) + else: + self.gen_about_data_basic_values = {'data_missing':'N', + 'data_size_bytes': '1G', + 'data_size_samples': '1M'} + if self.gen_choice_values['data_dataadv_pref']: + self.gen_about_data_advanced_wizard() + self.gen_about_data_adv_values = self.wiz_generic_data_adv.run(self.shell) + else: + self.gen_about_data_adv_values = {'data_distribution': 'N', + 'data_io_relation': 'N', + 'data_correlation': 'N', + 'data_cond_indep': 'N'} + if self.gen_choice_values['data_dataoutput_pref']: + self.gen_about_output_wizard() + self.gen_about_data_output_values = self.wiz_generic_data_output.run(self.shell) + else: + self.gen_about_data_output_values = {'data_type_output': '1', + 'data_output_prob': 'N'} + def run_unsupervised_wizard(self): """ @@ -643,8 +870,8 @@ class AlgoSelectorWizard(): else: repro = True else: - if 'y' in self.unsup_values['unsup_clus_dv'].tolower(): - if 'y' in self.unsup_values['unsup_clus_groups'].tolower(): + if 'y' in self.unsup_values['unsup_clus_dv'].lower(): + if 'y' in self.unsup_values['unsup_clus_groups'].lower(): clus_prob = True else: print("Unsupervised Learning model to consider: Hierarchical Clustering") @@ -652,21 +879,21 @@ class AlgoSelectorWizard(): else: repro = True if repro: - if 'y' in self.unsup_values['unsup_clus_outliers'].tolower(): + if 'y' in self.unsup_values['unsup_clus_outliers'].lower(): print("Unsupervised Learning model to consider: Hierarchical Clustering") else: print("Unsupervised Learning model to consider: DBSCAN") return if clus_prob: - if 'y' in self.gen_values['data_output_prob'].tolower(): + if 'y' in self.gen_about_data_output_values['data_output_prob'].lower(): print("Unsupervised Learning model to consider: Gaussian Mixture") else: print("Unsupervised Learning model to consider: KMeans") return elif int(self.unsup_values['unsup_goal']) == 2: # Dimensionality Reduction - if 'y' in self.unsup_values['unsup_dr_topic_mod'].tolower(): - if 'y' in self.gen_values['data_output_prob'].tolower(): + if 'y' in self.unsup_values['unsup_dr_topic_mod'].lower(): + if 'y' in self.gen_about_data_output_values['data_output_prob'].lower(): print("Unsupervised Learning model to consider: SVD") else: print("Unsupervised Learning model to consider: LDA") @@ -679,25 +906,25 @@ class AlgoSelectorWizard(): """ Decide which reinforement learning to use. """ - if (int(self.gen_values['data_type_output']) == 2 or - 'y' in self.ri_values['ri_model_preference'].tolower()): + if (int(self.gen_about_data_output_values['data_type_output']) == 2 or + 'y' in self.ri_values['ri_model_preference'].lower()): # Model Bsaed - if 'y' in self.ri_values['ri_model_availability'].tolower(): + if 'y' in self.ri_values['ri_model_availability'].lower(): print("Reinforcement Learning model to consider - AlphaZero") else: print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE") - elif 'n' in self.ri_values['ri_model_preference'].tolower(): + elif 'n' in self.ri_values['ri_model_preference'].lower(): # Model-Free based approach. - if 'y' not in self.ri_values['ri_modelfree_value'].tolower(): + if 'y' not in self.ri_values['ri_modelfree_value'].lower(): print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic") else: - if 'y' in self.ri_values['ri_modelfree_value_state'].tolower(): + if 'y' in self.ri_values['ri_modelfree_value_state'].lower(): print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)") else: print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets") else: # Default - print("Sorry. We need to discuss, please connect with Anuket Thoth Project ") + print("Sorry. We need to discuss, please connect with Anuket Thoth Project ") def perform_inference(self): """ @@ -705,28 +932,28 @@ class AlgoSelectorWizard(): """ # Decide whether data is Low or High self.data_size = 'unknown' - if ('k' in self.gen_values['data_size_bytes'].lower() or - 't' in self.gen_values['data_size_samples']): + if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or + 't' in self.gen_about_data_basic_values['data_size_samples']): self.data_size = 'low' - if int(self.gen_values['metric_interpretability']) >= 3 : + if int(self.gen_metrics_values['metric_interpretability']) >= 3 : self.interpretability = True - if int(self.gen_values['metric_speed']) >= 3 : + if int(self.gen_metrics_values['metric_speed']) >= 3 : self.faster = True - if int(self.gen_values['metric_reproducibility']) >= 3 : + if int(self.gen_metrics_values['metric_reproducibility']) >= 3 : self.reproducibility = True # Decide Features relative to Data (ftod_ratio) - high/low - if ('k' in self.gen_values['data_size_bytes'].lower() or - 't' in self.gen_values['data_size_samples']): - if int(self.gen_values['data_features_count']) > 50: + if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or + 't' in self.gen_about_data_basic_values['data_size_samples']): + if int(self.gen_data_features_values['data_features_count']) > 50: self.ftod_ratio = 'high' - elif ('m' in self.gen_values['data_size_bytes'].lower() or - 'm' in self.gen_values['data_size_samples']): - if int(self.gen_values['data_features_count']) > 5000: + elif ('m' in self.gen_about_data_basic_values['data_size_bytes'].lower() or + 'm' in self.gen_about_data_basic_values['data_size_samples']): + if int(self.gen_data_features_values['data_features_count']) > 5000: self.ftod_ratio = 'high' else: - if int(self.gen_values['data_features_count']) > 500000: + if int(self.gen_data_features_values['data_features_count']) > 500000: self.ftod_ratio = 'high' @@ -742,15 +969,15 @@ class AlgoSelectorWizard(): else: print("Supervised Learning model to consider - Random Forest") else: - if int(self.gen_values['data_column']) == 3: + if int(self.gen_data_main_values['data_column']) == 3: print("Supervised Learning model to consider - RNN") - elif (int(self.gen_values['data_column']) == 2 and - int(self.gen_values['data_signal_type']) == 1): + elif (int(self.gen_data_main_values['data_column']) == 2 and + int(self.gen_data_signal_values['data_signal_type']) == 1): print("Supervised Learning model to consider - CNN") - elif (int(self.gen_values['data_column']) == 2 and - (int(self.gen_values['data_signal_type']) == 2 or - int(self.gen_values['data_signal_type']) == 3)): - if 'y' in self.gen_values['data_output_prob'].tolower(): + elif (int(self.gen_data_main_values['data_column']) == 2 and + (int(self.gen_data_signal_values['data_signal_type']) == 2 or + int(self.gen_data_signal_values['data_signal_type']) == 3)): + if 'y' in self.gen_about_data_output_values['data_output_prob'].lower(): print("Supervised Learning model to consider - Naive Bayes") else: print("Supervised Learning model to consider - ANN") @@ -764,21 +991,21 @@ class AlgoSelectorWizard(): else: print("Supervised Learning model to consider - SVN with Gaussian Kernel") return - if int(self.gen_values['data_type_output']) != 2: + if int(self.gen_about_data_output_values['data_type_output']) != 2: from_b = True else: - if 'y' in self.gen_values['data_io_relation'].tolower(): + if 'y' in self.gen_about_data_adv_values['data_io_relation'].lower(): print("Supervised Learning model to consider - Linear Regression or Linear SVM") else: print("Supervised Learning model to consider - Polynomial Regression or nonLinear SVM") return if from_b: - if int(self.gen_values['data_output_type']) == 4: - if 'y' in self.gen_values['data_output_prob'].tolower(): - if 'y' in self.gen_values['data_cond_indep'].tolower(): + if int(self.gen_about_data_output_values['data_output_type']) == 4: + if 'y' in self.gen_about_data_output_values['data_output_prob'].lower(): + if 'y' in self.gen_about_data_adv_values['data_cond_indep'].lower(): print("Supervised Learning model to consider - Naive Bayes") else: - if 'y' in self.gen_values['data_correlation'].tolower(): + if 'y' in self.gen_about_data_adv_values['data_correlation'].lower(): print("Supervised Learning model to consider - LASSO or Ridge Regression") else: print("Supervised Learning model to consider - Logistic Regression")