I'd like to import the output of:
kubectl get pods -o json
into a python pandas dataframe. This should contain also all containers and there resource request and limits.
My code starts as follows:
import json
import numpy as np
import pandas as pd
import os
pods_raw = os.popen('kubectl get pods -o json').read()
pods_json = json.loads(pods_raw)['items']
from here on I struggle to get the data in a correct way in a dataframe, especially the 'spec.containers' should be split up when multiple containers exist.
CodePudding user response:
Here is an example how you can extract the data of interest to the dataframe. The output is only an example (as you didn't specify the required output in the question):
import json
import pandas as pd
# open the Json data from file (or use os.popen):
with open("data.json", "r") as f_in:
data = json.load(f_in)
df = pd.DataFrame(data["items"])
# metadata:
df = pd.concat(
[df, df.pop("metadata").apply(pd.Series).add_prefix("meta_")], axis=1
)
# spec:
df = pd.concat(
[df, df.pop("spec").apply(pd.Series).add_prefix("spec_")], axis=1
)
# status:
df = pd.concat(
[df, df.pop("status").apply(pd.Series).add_prefix("status_")], axis=1
)
# keep only columns of interests:
df = df[["meta_name", "meta_namespace", "status_phase", "spec_containers"]]
# explode spec_containers column
df = df.explode("spec_containers")
df = pd.concat(
[
df,
df.pop("spec_containers")
.apply(pd.Series)
.add_prefix("spec_")[["spec_image", "spec_name"]],
],
axis=1,
)
print(df)
Prints:
meta_name meta_namespace status_phase spec_image spec_name
0 apache-lb-648c5cb8cb-mw5zh default Running httpd apache
0 apache-lb-648c5cb8cb-mw5zh default Running index.docker.io/istio/proxyv2:1.13.4 istio-proxy
1 csi-cephfsplugin-fc79l default Running rocks.canonical.com:443/cdk/sig-storage/csi-node-driver-registrar:v2.0.1 driver-registrar
1 csi-cephfsplugin-fc79l default Running rocks.canonical.com:443/cdk/cephcsi/cephcsi:v3.3.1 csi-cephfsplugin
1 csi-cephfsplugin-fc79l default Running rocks.canonical.com:443/cdk/cephcsi/cephcsi:v3.3.1 liveness-prometheus
...and so on.
CodePudding user response:
Currently I have the following code to solve this:
#!/usr/bin/env python
import json
import pandas as pd
import os
kb = 1024
mb = kb * kb
gb = mb * kb
tb = gb * kb
def main():
pods_raw = os.popen('kubectl get pods -A -o json').read()
pods_json = json.loads(pods_raw)['items']
first_split = ['status','metadata','spec']
second_split = ['spec.containers','spec.containers.resources',"spec.containers.resources.limits","spec.containers.resources.requests"]
df_pods = pd.DataFrame.from_dict(pods_json)
df_pods = concat_data(df_pods, first_split)
df_pods = expand_data(df_pods, ['spec.containers'])
df_pods = concat_data(df_pods, second_split)
df_pods.index
df_pods.index.name='index'
col_to_normalize = ['spec.containers.resources.limits.cpu',
'spec.containers.resources.limits.memory',
'spec.containers.resources.requests.cpu',
'spec.containers.resources.requests.memory']
for col_name in col_to_normalize:
df_pods[col_name] = df_pods[col_name].map(normalize_values)
df_pods[col_to_normalize] = df_pods.groupby('index')[col_to_normalize].sum()
df_pods = df_pods.drop_duplicates(['metadata.name'])
df_pods[df_pods['status.phase'] == 'Running']
print(df_pods)
def concat_data(df: pd.DataFrame, expands: list) -> pd.DataFrame:
for expantion in expands:
# df = pd.concat( [df, df.pop(expantion).apply(pd.Series).add_prefix(f"{expantion}.")], axis=1)
df = pd.concat( [df, df.pop(expantion).apply(pd.Series).add_prefix(f"{expantion}.")], axis=1)
return df
def expand_data(df: pd.DataFrame, expands: list) -> pd.DataFrame:
for expantion in expands:
s = df[expantion].apply(pd.Series).stack()
s.index = s.index.droplevel(-1)
s.index
df.index = [x for x in df.index]
del df[expantion]
s.name = expantion
df=df.join(s)
return df
def normalize_values(val: str) -> int:
try:
if val[-1] == 'm':
return int(val[:-1]) / 1000
if val[-2].lower() == "k":
return int(val[:-2]) * kb
if val[-2].lower() == "m":
return int(val[:-2]) * mb
if val[-2].lower() == "g":
return int(val[:-2]) * gb
if val[-2].lower() == "t":
return int(val[:-2]) * tb
return int(val)
except:
return 0
if __name__ == '__main__':
main()
This works fine except for the following FutureWarning I get and don't know how to solve this yet:
./resources.py:43: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
df = pd.concat( [df, df.pop(expantion).apply(pd.Series).add_prefix(f"{expantion}.")], axis=1)