I am trying to rename the dataframe columns using the below code-
function _process_col(df)
for col in names(df)
print(col)
rename!(df, :col => _clean_col_name(col))
end
return df
end
But is throws error that col is not present in the dataframe. rename!(df, :col => _clean_col_name(col))
is treating col as string not as a variable.
note - _clean_col_name(col)
is a custom function to process the column name
Is there any alternative to do this??
CodePudding user response:
If you want to apply _clean_col_name
to all columns then use the following form:
julia> using DataFrames
julia> df = DataFrame(rand(3, 5), :auto)
3×5 DataFrame
Row │ x1 x2 x3 x4 x5
│ Float64 Float64 Float64 Float64 Float64
─────┼────────────────────────────────────────────────────
1 │ 0.0856504 0.677317 0.8402 0.630016 0.815347
2 │ 0.584487 0.997837 0.252574 0.659241 0.0699587
3 │ 0.196169 0.488646 0.689678 0.554855 0.321897
julia> _clean_col_name(x) = uppercase(x)
_clean_col_name (generic function with 1 method)
julia> rename!(_clean_col_name, df)
3×5 DataFrame
Row │ X1 X2 X3 X4 X5
│ Float64 Float64 Float64 Float64 Float64
─────┼────────────────────────────────────────────────────
1 │ 0.0856504 0.677317 0.8402 0.630016 0.815347
2 │ 0.584487 0.997837 0.252574 0.659241 0.0699587
3 │ 0.196169 0.488646 0.689678 0.554855 0.321897
If you want to stick to your function just remove :
in front of col
just as @BatWannaBe suggested:
julia> function _process_col(df)
for col in names(df)
print(col)
rename!(df, col => _clean_col_name(col))
end
return df
end
_process_col (generic function with 1 method)
julia> df = DataFrame(rand(3, 5), :auto)
3×5 DataFrame
Row │ x1 x2 x3 x4 x5
│ Float64 Float64 Float64 Float64 Float64
─────┼───────────────────────────────────────────────────
1 │ 0.445679 0.0197894 0.605917 0.668544 0.979025
2 │ 0.631891 0.185474 0.136334 0.218718 0.365156
3 │ 0.115752 0.308683 0.273192 0.638987 0.195281
julia> _process_col(df)
x1x2x3x4x53×5 DataFrame
Row │ X1 X2 X3 X4 X5
│ Float64 Float64 Float64 Float64 Float64
─────┼───────────────────────────────────────────────────
1 │ 0.445679 0.0197894 0.605917 0.668544 0.979025
2 │ 0.631891 0.185474 0.136334 0.218718 0.365156
3 │ 0.115752 0.308683 0.273192 0.638987 0.195281
Please check the docstring of rename!
to see other available options (as there are several more), just to give one example:
julia> df = DataFrame(rand(3, 5), :auto)
3×5 DataFrame
Row │ x1 x2 x3 x4 x5
│ Float64 Float64 Float64 Float64 Float64
─────┼────────────────────────────────────────────────────
1 │ 0.242173 0.0401673 0.674665 0.27598 0.338189
2 │ 0.0497058 0.958139 0.707002 0.258894 0.623699
3 │ 0.477812 0.5068 0.584878 0.198547 0.713736
julia> rename!(df, (names(df) .=> _clean_col_name.(names(df)))...)
3×5 DataFrame
Row │ X1 X2 X3 X4 X5
│ Float64 Float64 Float64 Float64 Float64
─────┼────────────────────────────────────────────────────
1 │ 0.242173 0.0401673 0.674665 0.27598 0.338189
2 │ 0.0497058 0.958139 0.707002 0.258894 0.623699
3 │ 0.477812 0.5068 0.584878 0.198547 0.713736
CodePudding user response:
function _process_col(df)
array = [];
for col in names(df)
push!(array,_clean_col_name(col))
end
rename!(df, Symbol.(array))
return df
end
where array is the list of new names for your columns works in your case.