I am training a model and for this I need to make an attributes section (with RFECV) and optimize the parameters of the model (GridSearchCV).
Code
model = LogisticRegression() #algoritmo
my_scorer = make_scorer(score, greater_is_better=True) #Se crea el'score' de la función propia creada
generador_train = GroupKFold(n_splits=10).split(X_train, y_train, order_train) #Generador para creación de los 10 splits siguiendo un orden dado
C= {'C': 10. ** np.arange(-3, 4)} #valores de C
scaler = preprocessing.StandardScaler() #Estandarizado
selector =RFECV(cv=generador_train, estimator=model,scoring=my_scorer) #Seleccion de atributos
pipe=Pipeline([('scaler', scaler),('select', selector),('model', model)]) # Se crea la pipeline
grid = GridSearchCV(estimator=pipe, param_grid=C,cv=generador_train,scoring=my_scorer,refit=True) #Se declara el gridSearch con CV
grid.fit(X_train, y_train) # Se ejecuta la pipeline
best_pipe=grid.best_estimator_
When executing the previous code I get the error:
- TypeError Traceback (most recent call
> last) <ipython-input-34-9d038a773283> in <module>()
> 17
> 18 grid = GridSearchCV(estimator=pipe, param_grid=C,cv=generador_train,scoring=my_scorer,refit=True) #Se
> declara el gridSearch con CV
> ---> 19 grid.fit(X_train,y_train)
> 20 best_pipe=grid.best_estimator_
>
> AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py
> in fit(self, X, y, groups, **fit_params)
> 622 n_candidates * n_splits))
> 623
> --> 624 base_estimator = clone(self.estimator)
> 625 pre_dispatch = self.pre_dispatch
> 626
>
> AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\base.py
> in clone(estimator, safe)
> 59 new_object_params = estimator.get_params(deep=False)
> 60 for name, param in six.iteritems(new_object_params):
> ---> 61 new_object_params[name] = clone(param, safe=False)
> 62 new_object = klass(**new_object_params)
> 63 params_set = new_object.get_params(deep=False)
>
> AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\base.py
> in clone(estimator, safe)
> 47 # XXX: not handling dictionaries
> 48 if estimator_type in (list, tuple, set, frozenset):
> ---> 49 return estimator_type([clone(e, safe=safe) for e in estimator])
> 50 elif not hasattr(estimator, 'get_params'):
> 51 if not safe:
>
> AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\base.py
> in <listcomp>(.0)
> 47 # XXX: not handling dictionaries
> 48 if estimator_type in (list, tuple, set, frozenset):
> ---> 49 return estimator_type([clone(e, safe=safe) for e in estimator])
> 50 elif not hasattr(estimator, 'get_params'):
> 51 if not safe:
>
> AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\base.py
> in clone(estimator, safe)
> 47 # XXX: not handling dictionaries
> 48 if estimator_type in (list, tuple, set, frozenset):
> ---> 49 return estimator_type([clone(e, safe=safe) for e in estimator])
> 50 elif not hasattr(estimator, 'get_params'):
> 51 if not safe:
>
> AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\base.py
> in <listcomp>(.0)
> 47 # XXX: not handling dictionaries
> 48 if estimator_type in (list, tuple, set, frozenset):
> ---> 49 return estimator_type([clone(e, safe=safe) for e in estimator])
> 50 elif not hasattr(estimator, 'get_params'):
> 51 if not safe:
>
> AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\base.py
> in clone(estimator, safe)
> 59 new_object_params = estimator.get_params(deep=False)
> 60 for name, param in six.iteritems(new_object_params):
> ---> 61 new_object_params[name] = clone(param, safe=False)
> 62 new_object = klass(**new_object_params)
> 63 params_set = new_object.get_params(deep=False)
>
> AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\base.py
> in clone(estimator, safe)
> 50 elif not hasattr(estimator, 'get_params'):
> 51 if not safe:
> ---> 52 return copy.deepcopy(estimator)
> 53 else:
> 54 raise TypeError("Cannot clone object '%s' (type %s): "
>
> AppData\Local\Continuum\Anaconda3\lib\copy.py in
> deepcopy(x, memo, _nil)
> 167 reductor = getattr(x, "__reduce_ex__", None)
> 168 if reductor:
> --> 169 rv = reductor(4)
> 170 else:
> 171 reductor = getattr(x, "__reduce__", None)
>
> TypeError: can't pickle generator objects
How can it be solved? What can be due?
Update:
I've put:
list(generador_train = GroupKFold(n_splits=10).split(X_train, y_train, order_train))
but I got this error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-150-d0ca294b7811> in <module>()
25
26 grid = GridSearchCV(estimator=pipe, param_grid=C, cv=generador_train,scoring=my_scorer,refit=True) #Se declara el gridSearch con CV
---> 27 grid.fit(X_train, y_train) # Se ejecuta la pipeline
28 #grid.fit(digits.data, digits.target)
29 #res=pipe.named_steps['select'].grid_scores_ #Resultados gridSearch
~\Anaconda4\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
637 error_score=self.error_score)
638 for parameters, (train, test) in product(candidate_params,
--> 639 cv.split(X, y, groups)))
640
641 # if one choose to see train score, "out" will contain train score info
~\Anaconda4\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
~\Anaconda4\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
627
~\Anaconda4\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
590
~\Anaconda4\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
~\Anaconda4\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
333
334 def get(self):
~\Anaconda4\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
~\Anaconda4\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
~\Anaconda4\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
456 estimator.fit(X_train, **fit_params)
457 else:
--> 458 estimator.fit(X_train, y_train, **fit_params)
459
460 except Exception as e:
~\Anaconda4\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
246 This estimator
247 """
--> 248 Xt, fit_params = self._fit(X, y, **fit_params)
249 if self._final_estimator is not None:
250 self._final_estimator.fit(Xt, y, **fit_params)
~\Anaconda4\lib\site-packages\sklearn\pipeline.py in _fit(self, X, y, **fit_params)
211 Xt, fitted_transformer = fit_transform_one_cached(
212 cloned_transformer, None, Xt, y,
--> 213 **fit_params_steps[name])
214 # Replace the transformer of the step with the fitted
215 # transformer. This is necessary when loading the transformer
~\Anaconda4\lib\site-packages\sklearn\externals\joblib\memory.py in __call__(self, *args, **kwargs)
360
361 def __call__(self, *args, **kwargs):
--> 362 return self.func(*args, **kwargs)
363
364 def call_and_shelve(self, *args, **kwargs):
~\Anaconda4\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, weight, X, y, **fit_params)
579 **fit_params):
580 if hasattr(transformer, 'fit_transform'):
--> 581 res = transformer.fit_transform(X, y, **fit_params)
582 else:
583 res = transformer.fit(X, y, **fit_params).transform(X)
~\Anaconda4\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
518 else:
519 # fit method of arity 2 (supervised transformation)
--> 520 return self.fit(X, y, **fit_params).transform(X)
521
522
~\Anaconda4\lib\site-packages\sklearn\feature_selection\rfe.py in fit(self, X, y)
434 scores = parallel(
435 func(rfe, self.estimator, X, y, train, test, scorer)
--> 436 for train, test in cv.split(X, y))
437
438 scores = np.sum(scores, axis=0)
~\Anaconda4\lib\site-packages\sklearn\feature_selection\rfe.py in <genexpr>(.0)
434 scores = parallel(
435 func(rfe, self.estimator, X, y, train, test, scorer)
--> 436 for train, test in cv.split(X, y))
437
438 scores = np.sum(scores, axis=0)
~\Anaconda4\lib\site-packages\sklearn\feature_selection\rfe.py in _rfe_single_fit(rfe, estimator, X, y, train, test, scorer)
26 Return the score for a fit across one fold.
27 """
---> 28 X_train, y_train = _safe_split(estimator, X, y, train)
29 X_test, y_test = _safe_split(estimator, X, y, test, train)
30 return rfe._fit(
~\Anaconda4\lib\site-packages\sklearn\utils\metaestimators.py in _safe_split(estimator, X, y, indices, train_indices)
198 X_subset = X[np.ix_(indices, train_indices)]
199 else:
--> 200 X_subset = safe_indexing(X, indices)
201
202 if y is not None:
~\Anaconda4\lib\site-packages\sklearn\utils\__init__.py in safe_indexing(X, indices)
158 indices.dtype.kind == 'i'):
159 # This is often substantially faster than X[indices]
--> 160 return X.take(indices, axis=0)
161 else:
162 return X[indices]
IndexError: index 182 is out of bounds for size 182