-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Fix ResultProcessor bug, LogisticRegression bug and missing value conversion bug #1236
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+56,347
−59
Merged
Changes from all commits
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
725199f
Fix some bugs, add some unit tests.
yaeldMS 31bd29c
Fix LR stats bug
yaeldMS 44a1789
Merge branch 'master' into bugfixes
yaeldMS 4db39ca
Undo accidental TermTransform change
yaeldMS 0f7d6de
Sweeper needs to load all components into ComponentCatalog
yaeldMS 944cf23
Rename Mapping.de-de.txt
yaeldMS 3026138
Fix cat transform issue
yaeldMS 58f7e2a
Compare pr baseline only on Windows
yaeldMS 92bb0f5
Merge branch 'master' into bugfixes
yaeldMS e7d2e9e
Move baselines to Common folder
yaeldMS f4b143b
Compare pr baseline only on Windows in another test
yaeldMS e64a447
Code review comment
yaeldMS 7710d9c
Fix ConcatTransform bug
yaeldMS a848fc3
Add baselines for ConcatTransform bug
yaeldMS c20a1b2
Fix another bug in TermTransform
yaeldMS 43a4481
Merge branch 'master' into bugfixes
yaeldMS d261045
NelderMead sweeper default value for FirstBatchSweeper arg
yaeldMS 7f608e1
Add some more unit tests
yaeldMS 745624e
Add more unit tests
yaeldMS 5069a43
Fix unit test baseline, and baseline comparison with tolerance.
yaeldMS b6d5992
Change back MatchWithTolerance method
yaeldMS 068954a
Merge with master
yaeldMS b44d4a6
Fix bad merge
yaeldMS File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -135,18 +135,20 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV | |
column.MaxNumTerms ?? args.MaxNumTerms, | ||
column.Sort ?? args.Sort, | ||
column.Term ?? args.Term); | ||
col.SetTerms(column.Terms); | ||
col.SetTerms(column.Terms ?? args.Terms); | ||
columns.Add(col); | ||
} | ||
return new CategoricalEstimator(env, columns.ToArray()).Fit(input).Transform(input) as IDataTransform; | ||
return new CategoricalEstimator(env, columns.ToArray(), args.DataFile, args.TermsColumn, args.Loader).Fit(input).Transform(input) as IDataTransform; | ||
} | ||
|
||
private readonly TransformerChain<ITransformer> _transformer; | ||
|
||
public CategoricalTransform(TermEstimator term, IEstimator<ITransformer> toVector, IDataView input) | ||
{ | ||
var chain = term.Append(toVector); | ||
_transformer = chain.Fit(input); | ||
if (toVector != null) | ||
_transformer = term.Append(toVector).Fit(input); | ||
else | ||
_transformer = new TransformerChain<ITransformer>(term.Fit(input)); | ||
} | ||
|
||
public Schema GetOutputSchema(Schema inputSchema) => _transformer.GetOutputSchema(inputSchema); | ||
|
@@ -198,15 +200,17 @@ internal void SetTerms(string terms) | |
/// <param name="outputKind">The type of output expected.</param> | ||
public CategoricalEstimator(IHostEnvironment env, string input, | ||
string output = null, CategoricalTransform.OutputKind outputKind = Defaults.OutKind) | ||
: this(env, new ColumnInfo(input, output ?? input, outputKind)) | ||
: this(env, new[] { new ColumnInfo(input, output ?? input, outputKind) }) | ||
{ | ||
} | ||
|
||
public CategoricalEstimator(IHostEnvironment env, params ColumnInfo[] columns) | ||
public CategoricalEstimator(IHostEnvironment env, ColumnInfo[] columns, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
out of curiosity can you make this constructor internal? I would prefer to not pollute our public API with these things. |
||
string file = null, string termsColumn = null, | ||
IComponentFactory<IMultiStreamSource, IDataLoader> loaderFactory = null) | ||
{ | ||
Contracts.CheckValue(env, nameof(env)); | ||
_host = env.Register(nameof(TermEstimator)); | ||
_term = new TermEstimator(_host, columns); | ||
_term = new TermEstimator(_host, columns, file, termsColumn, loaderFactory); | ||
var binaryCols = new List<(string input, string output)>(); | ||
var cols = new List<(string input, string output, bool bag)>(); | ||
for (int i = 0; i < columns.Length; i++) | ||
|
1 change: 1 addition & 0 deletions
1
test/BaselineOutput/Common/Command/CommandTrainMlrWithStats-1-out.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Saving predictor summary |
15 changes: 15 additions & 0 deletions
15
test/BaselineOutput/Common/Command/CommandTrainMlrWithStats-out.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
maml.exe Train tr=MultiClassLogisticRegression{maxiter=100 t=- stat=+} loader=TextLoader{col=Label:TX:4 col=Features:R4:0-3 sep=,} data=%Data% out=%Output% seed=1 xf=Term{col=Label} | ||
Automatically adding a MinMax normalization transform, use 'norm=Warn' or 'norm=No' to turn this behavior off. | ||
Beginning optimization | ||
num vars: 15 | ||
improvement criterion: Mean Improvement | ||
L1 regularization selected 11 of 15 weights. | ||
Model trained with 150 training examples. | ||
Residual Deviance: 132.0122 | ||
Null Deviance: 329.5837 | ||
AIC: 154.0122 | ||
Not training a calibrator because it is not needed. | ||
Physical memory usage(MB): %Number% | ||
Virtual memory usage(MB): %Number% | ||
%DateTime% Time elapsed(s): %Number% | ||
|
18 changes: 18 additions & 0 deletions
18
test/BaselineOutput/Common/Command/CommandTrainMlrWithStats-summary.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
MulticlassLogisticRegression bias and non-zero weights | ||
Iris-setosa+(Bias) 2.265129 | ||
Iris-versicolor+(Bias) 0.7695086 | ||
Iris-virginica+(Bias) -3.034663 | ||
Iris-setosa+f3 -3.180634 | ||
Iris-setosa+f2 -2.88663 | ||
Iris-setosa+f1 0.5392878 | ||
Iris-setosa+f0 -0.03958065 | ||
Iris-versicolor+f1 -0.7073272 | ||
Iris-virginica+f3 3.158146 | ||
Iris-virginica+f2 1.907791 | ||
Iris-virginica+f0 0.01793481 | ||
|
||
*** MODEL STATISTICS SUMMARY *** | ||
Count of training examples: 150 | ||
Residual Deviance: 132.0122 | ||
Null Deviance: 329.5837 | ||
AIC: 154.0122 |
1 change: 1 addition & 0 deletions
1
test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-1-out.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Saving predictor summary |
15 changes: 15 additions & 0 deletions
15
test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-out.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
maml.exe Train feat=Num lab=Lab tr=lr{t=- stat=+} loader=text{header+ sep=comma col=Lab:14 col=Num:0,2,4,10-12} data=%Data% out=%Output% | ||
Automatically adding a MinMax normalization transform, use 'norm=Warn' or 'norm=No' to turn this behavior off. | ||
Beginning optimization | ||
num vars: 7 | ||
improvement criterion: Mean Improvement | ||
L1 regularization selected 7 of 7 weights. | ||
Model trained with 32561 training examples. | ||
Residual Deviance: 26705.74 (on 32554 degrees of freedom) | ||
Null Deviance: 35948.08 (on 32560 degrees of freedom) | ||
AIC: 26719.74 | ||
Not training a calibrator because it is not needed. | ||
Physical memory usage(MB): %Number% | ||
Virtual memory usage(MB): %Number% | ||
%DateTime% Time elapsed(s): %Number% | ||
|
15 changes: 15 additions & 0 deletions
15
test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
Linear Binary Classification Predictor non-zero weights | ||
|
||
(Bias) -8.228298 | ||
capital-gain 18.58347 | ||
education-num 5.066041 | ||
hours-per-week 3.946534 | ||
age 3.86064 | ||
capital-loss 2.81616 | ||
fnlwgt 0.7489593 | ||
|
||
*** MODEL STATISTICS SUMMARY *** | ||
Count of training examples: 32561 | ||
Residual Deviance: 26705.74 | ||
Null Deviance: 35948.08 | ||
AIC: 26719.74 |
10 changes: 10 additions & 0 deletions
10
test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col={name={Count of training examples} type=I8 src=0} | ||
#@ col={name={Residual Deviance} type=R4 src=1} | ||
#@ col={name={Null Deviance} type=R4 src=2} | ||
#@ col=AIC:R4:3 | ||
#@ } | ||
Count of training examples Residual Deviance Null Deviance AIC | ||
521 98.29433 669.0935 118.294327 |
8 changes: 8 additions & 0 deletions
8
test/BaselineOutput/Common/EntryPoints/ensemble-model0-summary.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=Bias:R4:0 | ||
#@ col=Weights:R4:1-17 | ||
#@ } | ||
Bias Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Cat.5 Cat.4 Cat.3 Cat.2 Cat.7 Cat.10 Cat.8 Cat.6 | ||
-5.120674 2.353567 1.78653753 1.9442488 1.38072 1.0831089 2.43588924 1.61141682 1.34575915 -0.7715381 0 0 0 0 0 0 0 0 |
7 changes: 7 additions & 0 deletions
7
test/BaselineOutput/Common/EntryPoints/ensemble-model1-summary.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=Gains:R4:0-16 | ||
#@ } | ||
Cat.1 Cat.4 Cat.2 Cat.5 Cat.10 Cat.3 Cat.7 Cat.8 Cat.6 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli | ||
0.0607880056 0 0.0249023773 0 0 4.10026857E-09 0 0 0 0.190965369 1 0.7112387 0.14315024 0.222178861 0.413435966 0.254190356 0.2604484 |
10 changes: 10 additions & 0 deletions
10
test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col={name={Count of training examples} type=I8 src=0} | ||
#@ col={name={Residual Deviance} type=R4 src=1} | ||
#@ col={name={Null Deviance} type=R4 src=2} | ||
#@ col=AIC:R4:3 | ||
#@ } | ||
Count of training examples Residual Deviance Null Deviance AIC | ||
520 94.1969452 673.3445 114.196945 |
8 changes: 8 additions & 0 deletions
8
test/BaselineOutput/Common/EntryPoints/ensemble-model2-summary.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=Bias:R4:0 | ||
#@ col=Weights:R4:1-17 | ||
#@ } | ||
Bias Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Cat.5 Cat.4 Cat.2 Cat.3 Cat.7 Cat.10 Cat.8 Cat.6 | ||
-4.860323 2.143086 1.49418533 1.71121442 1.38318741 0.883200347 3.16845965 1.38684654 1.51904845 -0.8226236 0 0 0 0 0 0 0 0 |
7 changes: 7 additions & 0 deletions
7
test/BaselineOutput/Common/EntryPoints/ensemble-model3-summary.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=Gains:R4:0-16 | ||
#@ } | ||
Cat.1 Cat.5 Cat.2 Cat.4 Cat.3 Cat.7 Cat.10 Cat.8 Cat.6 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli | ||
0.009761757 0 0.0203766879 0 0.000928933 0 0 0 0 0.308038682 1 0.5590685 0.125412315 0.118880585 0.488731444 0.308761537 0.132577017 |
58 changes: 58 additions & 0 deletions
58
test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
Partition model 0 summary: | ||
Linear Binary Classification Predictor non-zero weights | ||
(Bias): -5.120674 | ||
Features.bare_nuclei: 2.435889 | ||
Features.thickness: 2.353567 | ||
Features.uniform_shape: 1.944249 | ||
Features.uniform_size: 1.786538 | ||
Features.bland_chromatin: 1.611417 | ||
Features.adhesion: 1.38072 | ||
Features.normal_nucleoli: 1.345759 | ||
Features.epit_size: 1.083109 | ||
Cat.1: -0.7715381 | ||
Count of training examples: 521 | ||
Residual Deviance: 98.29433 | ||
Null Deviance: 669.0935 | ||
AIC: 118.2943 | ||
Partition model 1 summary: | ||
Per-feature gain summary for the boosted tree ensemble: | ||
Features.uniform_size: 1 | ||
Features.uniform_shape: 0.711238682354263 | ||
Features.bare_nuclei: 0.413435971399054 | ||
Features.normal_nucleoli: 0.260448393604327 | ||
Features.bland_chromatin: 0.254190368593018 | ||
Features.epit_size: 0.222178863469679 | ||
Features.thickness: 0.190965373645692 | ||
Features.adhesion: 0.143150245168852 | ||
Cat.1: 0.0607880054395048 | ||
Cat.2: 0.0249023775790133 | ||
Cat.3: 4.10026871732935E-09 | ||
Partition model 2 summary: | ||
Linear Binary Classification Predictor non-zero weights | ||
(Bias): -4.860323 | ||
Features.bare_nuclei: 3.16846 | ||
Features.thickness: 2.143086 | ||
Features.uniform_shape: 1.711214 | ||
Features.normal_nucleoli: 1.519048 | ||
Features.uniform_size: 1.494185 | ||
Features.bland_chromatin: 1.386847 | ||
Features.adhesion: 1.383187 | ||
Features.epit_size: 0.8832003 | ||
Cat.1: -0.8226236 | ||
Count of training examples: 520 | ||
Residual Deviance: 94.19695 | ||
Null Deviance: 673.3445 | ||
AIC: 114.1969 | ||
Partition model 3 summary: | ||
Per-feature gain summary for the boosted tree ensemble: | ||
Features.uniform_size: 1 | ||
Features.uniform_shape: 0.559068504082849 | ||
Features.bare_nuclei: 0.488731457203164 | ||
Features.bland_chromatin: 0.308761540884501 | ||
Features.thickness: 0.308038677882308 | ||
Features.normal_nucleoli: 0.132577017456797 | ||
Features.adhesion: 0.125412316945858 | ||
Features.epit_size: 0.118880587537871 | ||
Cat.2: 0.0203766881332348 | ||
Cat.1: 0.00976175711400017 | ||
Cat.3: 0.000928932959407758 |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Heh heh. Whoops! #Resolved