ValueError: Input X contains NaN-CodePudding

I'm training to classify my traffic using SVM ML..as below

import pandas as pd # for process the DataSet
import matplotlib.pyplot as plt 
ds= pd.read_csv("dataset_sdn.csv") # to read the dataset with name (ds)
ds.fillna(0)
ds #

ds output

X = ds.iloc[: , [4,5,6,7,8,9,10,11,12,13,14,17,18,19,20,21]]   # Input Features  
Y = ds.iloc[:, 22]  # OutPut
print (X)
print (Y)

X output

Y output

from sklearn.model_selection import train_test_split
X_Train, X_Test, Y_Train, Y_Test = train_test_split (X, Y, test_size=0.25, random_state=0)

from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_Train = sc_X.fit_transform(X_Train)
X_Test = sc_X.transform(X_Test)

from sklearn.svm import SVC
classifier = SVC (kernel='linear', random_state=0)
classifier.fit(X_Train, Y_Train)
Y_pred = classifier.predict(X_Test)

here in this last step i get error message

ValueError Traceback (most recent call last) Input In [43], in <cell line: 3>() 1 from sklearn.svm import SVC 2 classifier = SVC (kernel='linear', random_state=0) ----> 3 classifier.fit(X_Train, Y_Train) 5 # The output predect 6 Y_pred = classifier.predict(X_Test)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\svm_base.py:173, in BaseLibSVM.fit(self, X, y, sample_weight) 171 check_consistent_length(X, y) 172 else: --> 173 X, y = self._validate_data( 174 X, 175 y, 176 dtype=np.float64, 177 order="C", 178 accept_sparse="csr", 179 accept_large_sparse=False, 180 ) 182 y = self._validate_targets(y) 184 sample_weight = np.asarray( 185 [] if sample_weight is None else sample_weight, dtype=np.float64 186 )

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\base.py:596, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params) 594 y = check_array(y, input_name="y", **check_y_params) 595 else: --> 596 X, y = check_X_y(X, y, **check_params) 597 out = X, y 599 if not no_val_X and check_params.get("ensure_2d", True):

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils\validation.py:1074, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator) 1069 estimator_name = _check_estimator_name(estimator) 1070 raise ValueError( 1071 f"{estimator_name} requires y to be passed, but the target y is None" 1072 ) -> 1074 X = check_array( 1075 X, 1076 accept_sparse=accept_sparse, 1077
accept_large_sparse=accept_large_sparse, 1078 dtype=dtype,
1079 order=order, 1080 copy=copy, 1081
force_all_finite=force_all_finite, 1082 ensure_2d=ensure_2d,
1083 allow_nd=allow_nd, 1084
ensure_min_samples=ensure_min_samples, 1085
ensure_min_features=ensure_min_features, 1086
estimator=estimator, 1087 input_name="X", 1088 ) 1090 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator) 1092 check_consistent_length(X, y)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils\validation.py:899, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name) 893 raise ValueError( 894 "Found array with dim %d. %s expected <= 2." 895 % (array.ndim, estimator_name) 896 ) 898 if force_all_finite: --> 899 _assert_all_finite( 900 array, 901 input_name=input_name, 902 estimator_name=estimator_name, 903 allow_nan=force_all_finite == "allow-nan", 904 ) 906 if ensure_min_samples > 0: 907 n_samples = _num_samples(array)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils\validation.py:146, in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name, input_name) 124 if ( 125 not allow_nan 126 and estimator_name (...) 130 # Improve the error message on how to handle missing values in 131 # scikit-learn. 132 msg_err = ( 133 f"\n{estimator_name} does not accept missing values" 134 " encoded as NaN natively. For supervised learning, you might want" (...) 144 "#estimators-that-handle-nan-values" 145 ) --> 146 raise ValueError(msg_err) 148 # for object dtype data, we only check for NaNs (GH-13254) 149 elif X.dtype == np.dtype("object") and not allow_nan:

ValueError: Input X contains NaN. SVC does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

So, plz any advice to solve this error, although there isn't any NaN value in the dataset

CodePudding user response：

You are not replacing old dataframe with new dataframe.

Use this:

ds = ds.fillna(0)

ds.fillna(0, inplace=True)