how can i run a python code periodically on a docker container-CodePudding

I have a python code which works for doing data analytics from csv file. I want to run my python code to be run periodically on a docker container. Every 15 seconds, it should automatically look at a folder A, if there is a csv file in it, it should process it and put an html report with the same name in folder B.

HERE IS MY PYTHON CODE .

#This program pulls data from csv file and displays it as html file.

#csv file contains device names, card names and temperatures of cards

#The html file contains: how many devices, how many cards are in the system, which 
#device has the highest temperature card, and in the table below is how many cards are 
#there in summary for each device, how many have a temperature of 70 and above, the 
#highest and average card what are the temperatures    
#NOTE: The print functions in the program are written for trial purposes.

from enum import unique
from re import A, T
import pandas as pd

from prettytable import PrettyTable, PLAIN_COLUMNS
table = PrettyTable() #create a table for device

table2 = PrettyTable() #create a table for summary

table.field_names = ["Device   -", "Total # of Cards  - ", "High Temp. Cards #  - ",         "Max Temperature - ", "Avg. Temperature   "]

table2.field_names = [" ","  "] 


df = pd.read_csv("cards.csv", sep=';', usecols = ['Device','Card','Temperature'])""", index_col=["Device","Card"]"""

print(type(df))
print(df["Device"].nunique(),"\n\n") # number of unique server

total_devices = df["Device"].nunique() # NUMBER OF DEVICES IN DIFFERENT TYPES
print(total_devices)


print(df["Device"].loc[1],"\n\n") 



print(df['Temperature'].max(),"\n\n") 
maxTemp = df['Temperature'].max() #finding max temperature
print("total card     ", )



i= 0
j=1

#Finding the card with the max temperature and the server where the card is located
while j > 0:
  if df["Temperature"].loc[i] == df["Temperature"].max():
   print(df["Device"].loc[i]) 
   print(df["Card"].loc[i])

   deviceName = df["Device"].loc[i]
   cardName = df["Card"].loc[i]
   j= 0 

  else :
   i = i 1 

dev_types = df["Device"].unique() # Server's names
print("\n\n")
newstr = cardName   "/"   deviceName
#Summary tablosunu olusturma
table2.add_row(["Total Devices         ", total_devices] )

table2.add_row(["Total Cads            ", len(df["Card"])])

table2.add_row(["Max Card Temperature  ", df["Temperature"].max()])



table2.add_row(["Hottest Card / Device " ,newstr])

print(table2)

row_num = len(df)
print(row_num)

#I pulled the data from the file according to the device type so that the server cards and temperatures were sorted, I found the max temp from here
dn = pd.read_csv("cards.csv", sep=';', index_col=["Device"], usecols = ['Device','Card','Temperature'])

sum = []
high = []


#print("max temp: ", dn["Temperature"].loc[dev_types[1]].max())
for x in range(total_devices): # total devices (according the file = 3 )
  print("\n")

  cardCount = 0 # counts the number of cards belonging to the device
  count2 = 0 # Counts the number of cards with a temperature greater than 70
  tempcount = 0
  print(dev_types[x])
  for y in range(row_num):
   if dev_types[x] == df["Device"].loc[y]:
     print(df["Temperature"].loc[y])
     tempcount = tempcount   df["Temperature"].loc[y]  # the sum of the temperatures of the cards(used when calculating the average)
     cardCount = cardCount  1 
     if df["Temperature"].loc[y] >= 70:
      count2 = count2  1
  

  maxT = dn["Temperature"].loc[dev_types[x]].max() #Finding the ones with the max temperature from the cards belonging to the server
  avg = str(tempcount/cardCount)
  print("avg",avg)
  table.add_row([dev_types[x], cardCount, count2, maxT,avg ]) # I added the information to the "devices" table

  print("num of cards" , cardCount) 
  print("high temp cards" , count2)

print("\n\n")


print("\n\n")

print(table)

htmlCode = table.get_html_string()
htmlCode2 = table2.get_html_string()
f= open('devices.html', 'w') 
f.write("SUMMARY")
f.write(htmlCode2)
f.write("DEVICES")
f.write(htmlCode)

CodePudding user response：

Whether or not the code is run in Docker doesn't matter.

Wrap all of that current logic (well, not the imports and so on) in a function, say, def process_cards().
Call that function forever, in a loop:

import logging


def process_cards():
    table = PrettyTable()
    ...

def main():
    logging.basicConfig()
    while True:
        try:
            process_cards()
        except Exception:
            logging.exception("Failed processing")
        time.sleep(15)

if __name__ == "__main__":
    main()

As an aside, your data processing code can be vastly simplified:

import pandas as pd

from prettytable import PrettyTable


def get_summary_table(df):
    summary_table = PrettyTable()  # create a table for summary
    total_devices = df["Device"].nunique()
    hottest_card = df.loc[df["Temperature"].idxmax()]
    hottest_device_desc = f"{hottest_card.Card}/{hottest_card.Device}"
    summary_table.add_row(["Total Devices", total_devices])
    summary_table.add_row(["Total Cards", len(df["Card"])])
    summary_table.add_row(["Max Card Temperature", df["Temperature"].max()])
    summary_table.add_row(["Hottest Card / Device ", hottest_device_desc])
    return summary_table


def get_devices_table(df):
    devices_table = PrettyTable(
        [
            "Device",
            "Total # of Cards",
            "High Temp. Cards #",
            "Max Temperature",
            "Avg. Temperature",
        ]
    )
    for device_name, group in df.groupby("Device"):
        count = len(group)
        avg_temp = group["Temperature"].mean()
        max_temp = group["Temperature"].max()
        high_count = group[group.Temperature >= 70]["Temperature"].count()
        print(f"{device_name=} {avg_temp=} {max_temp=} {high_count=}")
        devices_table.add_row([device_name, count, high_count, max_temp, avg_temp])
    return devices_table


def do_processing(csv_file="cards.csv", html_file="devices.html"):
    # df = pd.read_csv(csv_file, sep=';', usecols=['Device', 'Card', 'Temperature'])
    # (Just some random example data)
    df = pd.DataFrame({
        "Device": [f"Device {1   x // 3}" for x in range(10)],
        "Card": [f"Card {x   1}" for x in range(10)],
        "Temperature": [59.3, 77.2, 48.5, 60.1, 77.2, 61.1, 77.4, 65.8, 71.2, 60.3],
    })
    summary_table = get_summary_table(df)
    devices_table = get_devices_table(df)

    with open(html_file, "w") as f:
        f.write(
            "<style>table, th, td {border: 1px solid black; border-collapse: collapse;}</style>"
        )
        f.write("SUMMARY")
        f.write(summary_table.get_html_string(header=False))
        f.write("DEVICES")
        f.write(devices_table.get_html_string())


do_processing()

CodePudding user response：

i have an example of repeat decorator for run your function every seconds or minutes ...

i hope this sample helps you

from typing import Optional, Callable, Awaitable
import asyncio
from functools import wraps



def repeat_every(*, seconds: float, wait_first: bool = False)-> Callable:
    def decorator(function: Callable[[], Optional[Awaitable[None]]]):
        is_coroutine = asyncio.iscoroutinefunction(function)

        @wraps(function)
        async def wrapped():
            async def loop():
                if wait_first:
                    await asyncio.sleep(seconds)
                while True:
                    try:
                        if is_coroutine:
                            await function()
                        else:
                            await asyncio.run_in_threadpool(function)
                    except Exception as e:
                        raise e
                    await asyncio.sleep(seconds)

            asyncio.create_task(loop())

        return wrapped
    print("Repeat every working well.")
    return decorator


@repeat_every(seconds=2)
async def main():
    print(2*2)

try:
    loop = asyncio.get_running_loop()
except RuntimeError: 
    loop = None
    
if loop and loop.is_running():
    print('Async event loop already running.')
    tsk = loop.create_task(main())
    tsk.add_done_callback(
        lambda t: print(f'Task done with result= {t.result()}'))
else:
    print('Starting new event loop')
    asyncio.run(main())

and there is an option that you can make an entrypoint which has cronjob