I am trying to view the dataframe generated from a .xlsx file. When I view it with just print, it generates just fine in a Jupyter Notebook.
import seaborn as sns
import scienceplots
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import openpyxl
df = pd.read_excel('2022-Nov-14_Neut.xlsx', engine='openpyxl')
print(df)
When I try to print it with
pd.set_option('display.max_rows', True)
I get:
TypeError Traceback (most recent call last)
Cell In[14], line 9
6 import openpyxl
8 df = pd.read_excel('2022-Nov-14_Neut.xlsx', engine='openpyxl')
----> 9 print(df)
File ~/.local/lib/python3.10/site-packages/pandas/core/frame.py:1063, in DataFrame.__repr__(self)
1060 return buf.getvalue()
1062 repr_params = fmt.get_dataframe_repr_params()
-> 1063 return self.to_string(**repr_params)
File ~/.local/lib/python3.10/site-packages/pandas/core/frame.py:1226, in DataFrame.to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, max_cols, show_dimensions, decimal, line_width, min_rows, max_colwidth, encoding)
1223 from pandas import option_context
1225 with option_context("display.max_colwidth", max_colwidth):
-> 1226 formatter = fmt.DataFrameFormatter(
1227 self,
1228 columns=columns,
1229 col_space=col_space,
1230 na_rep=na_rep,
1231 formatters=formatters,
1232 float_format=float_format,
1233 sparsify=sparsify,
1234 justify=justify,
1235 index_names=index_names,
1236 header=header,
1237 index=index,
1238 min_rows=min_rows,
1239 max_rows=max_rows,
1240 max_cols=max_cols,
1241 show_dimensions=show_dimensions,
1242 decimal=decimal,
1243 )
1244 return fmt.DataFrameRenderer(formatter).to_string(
1245 buf=buf,
1246 encoding=encoding,
1247 line_width=line_width,
1248 )
File ~/.local/lib/python3.10/site-packages/pandas/io/formats/format.py:610, in DataFrameFormatter.__init__(self, frame, columns, col_space, header, index, na_rep, formatters, justify, float_format, sparsify, index_names, max_rows, min_rows, max_cols, show_dimensions, decimal, bold_rows, escape)
607 self.max_rows_fitted = self._calc_max_rows_fitted()
609 self.tr_frame = self.frame
--> 610 self.truncate()
611 self.adj = get_adjustment()
File ~/.local/lib/python3.10/site-packages/pandas/io/formats/format.py:801, in DataFrameFormatter.truncate(self)
798 self._truncate_horizontally()
800 if self.is_truncated_vertically:
--> 801 self._truncate_vertically()
File ~/.local/lib/python3.10/site-packages/pandas/io/formats/format.py:844, in DataFrameFormatter._truncate_vertically(self)
842 else:
843 row_num = cast(int, self.max_rows)
--> 844 self.tr_frame = self.tr_frame.iloc[:row_num, :]
845 self.tr_row_num = row_num
File ~/.local/lib/python3.10/site-packages/pandas/core/indexing.py:1067, in _LocationIndexer.__getitem__(self, key)
1065 if self._is_scalar_access(key):
1066 return self.obj._get_value(*key, takeable=self._takeable)
-> 1067 return self._getitem_tuple(key)
1068 else:
1069 # we by definition only have the 0th axis
1070 axis = self.axis or 0
File ~/.local/lib/python3.10/site-packages/pandas/core/indexing.py:1567, in _iLocIndexer._getitem_tuple(self, tup)
1564 with suppress(IndexingError):
1565 return self._getitem_lowerdim(tup)
-> 1567 return self._getitem_tuple_same_dim(tup)
File ~/.local/lib/python3.10/site-packages/pandas/core/indexing.py:924, in _LocationIndexer._getitem_tuple_same_dim(self, tup)
921 if com.is_null_slice(key):
922 continue
--> 924 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
925 # We should never have retval.ndim < self.ndim, as that should
926 # be handled by the _getitem_lowerdim call above.
927 assert retval.ndim == self.ndim
File ~/.local/lib/python3.10/site-packages/pandas/core/indexing.py:1602, in _iLocIndexer._getitem_axis(self, key, axis)
1596 raise IndexError(
1597 "DataFrame indexer is not allowed for .iloc\n"
1598 "Consider using .loc for automatic alignment."
1599 )
1601 if isinstance(key, slice):
-> 1602 return self._get_slice_axis(key, axis=axis)
1604 if is_iterator(key):
1605 key = list(key)
File ~/.local/lib/python3.10/site-packages/pandas/core/indexing.py:1637, in _iLocIndexer._get_slice_axis(self, slice_obj, axis)
1634 return obj.copy(deep=False)
1636 labels = obj._get_axis(axis)
-> 1637 labels._validate_positional_slice(slice_obj)
1638 return self.obj._slice(slice_obj, axis=axis)
File ~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py:4213, in Index._validate_positional_slice(self, key)
4208 """
4209 For positional indexing, a slice must have either int or None
4210 for each of start, stop, and step.
4211 """
4212 self._validate_indexer("positional", key.start, "iloc")
-> 4213 self._validate_indexer("positional", key.stop, "iloc")
4214 self._validate_indexer("positional", key.step, "iloc")
File ~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py:6634, in Index._validate_indexer(self, form, key, kind)
6631 assert kind in ["getitem", "iloc"]
6633 if key is not None and not is_integer(key):
-> 6634 raise self._invalid_indexer(form, key)
TypeError: cannot do positional indexing on RangeIndex with these indexers [True] of type bool
I tried to first convert the .xlsx to .csv, but I got the same error upon trying to print the dataframe with all rows showing.
I expected to see a full view of the .xlsx.
I'm a newbie, so any help would be thoroughly appreciated.
Here are a few rows of the xlsx, if it helps:
2022-Der-08_plate1 | A | B1 | string | 20 | 4 |
---|---|---|---|---|---|
2022-Der-08_plate1 | A | B2 | string | 60 | 8 |
2022-Der-08_plate1 | A | B3 | string | 180 | 4 |
2022-Der-08_plate1 | A | B4 | string | 540 | 19 |
2022-Der-08_plate1 | A | B5 | string | 1620 | 32 |
2022-Der-08_plate1 | A | B6 | string | 4860 | 32 |
2022-Der-08_plate1 | A | B7 | string | 14580 | 60 |
2022-Der-08_plate1 | A | B8 | string | 43740 | 71 |
2022-Der-08_plate1 | A | B9 | string | 131220 | 80 |
2022-Der-08_plate1 | A | B10 | string | 393660 | 69 |
2022-Der-08_plate1 | A | B11 | string | 53 | |
2022-Der-08_plate1 | A | B12 | string | 1 | |
2022-Der-08_plate1 | A | C1 | string | 20 | 5 |
2022-Der-08_plate1 | A | C2 | string | 60 | 10 |
CodePudding user response:
The pandas library uses integer indices to select rows from a DataFrame.
It's not possible use only boolean values.
To select rows from a DataFrame based on a boolean condition, you can use the DataFrame.loc indexer and pass it a boolean array that has the same length as the DataFrame.
Exemple:
df = pd.read_excel('2022-Nov-14_Neut.xlsx', engine='openpyxl')
df_selected = df.loc[df['column_name'] > 5]
CodePudding user response:
The default value of max_rows
is 10. If set to None
then it means all rows of the data frame. So instead of True
, put None
.