I am using this code to create a CSV file to extract lines of code containing 'Data,9,record' in the line with headers that correspond to the timestamp, latitude, longitude, distance, altitude, speed values in specific columns of data:
import csv
with open("assets/ride.csv", "r") as source:
lines = source.readlines()
with open("solution06.csv", "w") as new_file:
# Write header
new_file.write(','.join(('timestamp','latitude','longitude','distance','altitude','speed')))
new_file.write('\n')
# Iterate over all lines after original header
for line in lines[1:]:
line = line.split(',')
# Only record lines that start with "Data,<number>,record"
if line[0] == 'Data'and line[1] == '9' and line[2] == 'record':
# Join the desired data by commas and write as a new line
new_file.write(','.join(line[column - 1].strip('"') for column in (5, 8, 11, 14, 17, 20)))
new_file.write('\n')
with open("solution06.csv", "r") as source:
for line in source.readlines():
print(line.strip('\n').split(','))
I want to do the same exact thing above but using Pandas instead. Is there any way to replicate the Python code into a Pandas version of the commands?
Snippet of data from CSV file:
Type,Local Number,Message,Field 1,Value 1,Units 1,Field 2,Value 2,Units 2,Field 3,Value 3,Units 3,Field 4,Value 4,Units 4,Field 5,Value 5,Units 5,Field 6,Value 6,Units 6,Field 7,Value 7,Units 7,Field 8,Value 8,Units 8,Field 9,Value 9,Units 9,Field 10,Value 10,Units 10,Field 11,Value 11,Units 11,Field 12,Value 12,Units 12,Field 13,Value 13,Units 13,Field 14,Value 14,Units 14,Field 15,Value 15,Units 15,Field 16,Value 16,Units 16,Field 17,Value 17,Units 17,Field 18,Value 18,Units 18,Field 19,Value 19,Units 19,Field 20,Value 20,Units 20,Field 21,Value 21,Units 21,Field 22,Value 22,Units 22,Field 23,Value 23,Units 23,Field 24,Value 24,Units 24,Field 25,Value 25,Units 25,Field 26,Value 26,Units 26,Field 27,Value 27,Units 27,Field 28,Value 28,Units 28,Field 29,Value 29,Units 29,Field 30,Value 30,Units 30,Field 31,Value 31,Units 31,Field 32,Value 32,Units 32,Field 33,Value 33,Units 33,Field 34,Value 34,Units 34,Field 35,Value 35,Units 35,Field 36,Value 36,Units 36,Field 37,Value 37,Units 37,Field 38,Value 38,Units 38,Field 39,Value 39,Units 39,Field 40,Value 40,Units 40,Field 41,Value 41,Units 41,Field 42,Value 42,Units 42,Field 43,Value 43,Units 43,Field 44,Value 44,Units 44,Field 45,Value 45,Units 45,Field 46,Value 46,Units 46,Field 47,Value 47,Units 47,Field 48,Value 48,Units 48,Field 49,Value 49,Units 49,Field 50,Value 50,Units 50,Field 51,Value 51,Units 51,Field 52,Value 52,Units 52,Field 53,Value 53,Units 53,Field 54,Value 54,Units 54,Field 55,Value 55,Units 55,Field 56,Value 56,Units 56,Field 57,Value 57,Units 57,Field 58,Value 58,Units 58,Field 59,Value 59,Units 59,Field 60,Value 60,Units 60,Field 61,Value 61,Units 61,Field 62,Value 62,Units 62,Field 63,Value 63,Units 63,Field 64,Value 64,Units 64,Field 65,Value 65,Units 65,Field 66,Value 66,Units 66,Field 67,Value 67,Units 67,Field 68,Value 68,Units 68,Field 69,Value 69,Units 69,Field 70,Value 70,Units 70,Field 71,Value 71,Units 71,Field 72,Value 72,Units 72,Field 73,Value 73,Units 73,Field 74,Value 74,Units 74,Field 75,Value 75,Units 75,Field 76,Value 76,Units 76,Field 77,Value 77,Units 77,Field 78,Value 78,Units 78,Field 79,Value 79,Units 79,Field 80,Value 80,Units 80,Field 81,Value 81,Units 81,Field 82,Value 82,Units 82,
Definition,0,file_id,serial_number,1,,time_created,1,,unknown,1,,manufacturer,1,,product,1,,number,1,,type,1,,
Data,0,file_id,serial_number,"3938543757",,time_created,"896018500",,manufacturer,"1",,garmin_product,"1765",,type,"4",,,,,,,,
Definition,1,file_creator,software_version,1,,hardware_version,1,,,,,,,,,,,,,,,,,
Data,1,file_creator,software_version,"930",,,,,,,,,,,,,,,,,,,,
Definition,2,event,timestamp,1,,data,1,,event,1,,event_type,1,,event_group,1,,,,,,,,
Data,2,event,timestamp,"896018500",s,timer_trigger,"0",,event,"0",,event_type,"0",,event_group,"0",,,,,,,,
Definition,3,device_info,timestamp,1,,serial_number,1,,cum_operating_time,1,,unknown,1,,unknown,1,,unknown,1,,manufacturer,1,,product,1,,software_version,1,,battery_voltage,1,,ant_device_number,1,,device_index,1,,device_type,1,,hardware_version,1,,unknown,1,,battery_status,1,,ant_transmission_type,1,,ant_network,1,,unknown,1,,source_type,1,,
Data,3,device_info,timestamp,"896018500",s,serial_number,"3938543757",,manufacturer,"1",,garmin_product,"1765",,software_version,"9.3",,device_index,"0",,source_type,"5",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,3,device_info,timestamp,"896018500",s,manufacturer,"1",,garmin_product,"1765",,software_version,"9.3",,device_index,"1",,device_type,"4",,source_type,"5",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,4,unknown,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,4,unknown,unknown,"896018500",,unknown,"1",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,5,unknown,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,5,unknown,unknown,"896018500",,unknown,"895838400",,unknown,"896421600",,unknown,"1",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,6,unknown,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,6,unknown,unknown,"896018500",,unknown,"10319",,unknown,"1000",,unknown,"0",,unknown,"133",,unknown,"51",,unknown,"190",,unknown,"1",,unknown,"40",,unknown,"188",,unknown,"1",,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,7,sport,name,24,,unknown,1,,sport,1,,sub_sport,1,,unknown,1,,unknown,1,,unknown,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,7,sport,name,"Bike",,unknown,"61",,sport,"2",,sub_sport,"0",,unknown,"1",,unknown,"0",,unknown,"0|255|0",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,8,record,timestamp,1,,distance,1,,altitude,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,8,record,timestamp,"896018500",s,distance,"0.0",m,altitude,"286.0",m,unknown,"3930",,unknown,"1002",,enhanced_altitude,"286.0",m,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,3,device_info,timestamp,"896018504",s,manufacturer,"1",,garmin_product,"1620",,software_version,"3.0",,device_index,"2",,device_type,"0",,source_type,"5",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,4,unknown,unknown,"896018504",,unknown,"2",,unknown,"2",,unknown,"1",,unknown,"0",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Definition,9,record,timestamp,1,,position_lat,1,,position_long,1,,distance,1,,altitude,1,,speed,1,,unknown,1,,unknown,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,9,record,timestamp,"896018545",s,position_lat,"504719750",semicircles,position_long,"-998493490",semicircles,distance,"10.87",m,altitude,"285.79999999999995",m,speed,"1.773",m/s,unknown,"3929",,unknown,"1002",,enhanced_altitude,"285.79999999999995",m,enhanced_speed,"1.773",m/s,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,9,record,timestamp,"896018560",s,position_lat,"504717676",semicircles,position_long,"-998501870",semicircles,distance,"71.85",m,altitude,"285.0",m,speed,"5.533",m/s,unknown,"3924",,unknown,"1001",,enhanced_altitude,"285.0",m,enhanced_speed,"5.533",m/s,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,9,record,timestamp,"896018566",s,position_lat,"504716354",semicircles,position_long,"-998506792",semicircles,distance,"108.02",m,altitude,"284.0",m,speed,"6.485",m/s,unknown,"3919",,unknown,"1001",,enhanced_altitude,"284.0",m,enhanced_speed,"6.485",m/s,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Data,9,record,timestamp,"896018575",s,position_lat,"504714055",semicircles,position_long,"-998515244",semicircles,distance,"170.23",m,altitude,"284.0",m,speed,"6.951",m/s,unknown,"3919",,unknown,"1001",,enhanced_altitude,"284.0",m,enhanced_speed,"6.951",m/s,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
The final outcome I need looks like this:
CodePudding user response:
I'm not at my computer, so I can't give you the exact code, but in general:
- This is exactly what pandas might typically be used for.
- You would use pandas.read_csv("filename") to bring all the data in to a dataframe; you could also specify which columns to bring in by passing a header.
- You could then filter that dataframe on the first three columns
- Print it to a new file.
Much easier than filtering on string commands! The pandas documentation will cover this use pretty well. https://pandas.pydata.org/pandas-docs/stable/getting_started/index.html#getting-started
CodePudding user response:
Here is your solution :
import pandas as pd
df = pd.read_csv("your-csv.csv", header=None)
new_df = df[(df[0]=="Data") & (df[1]==9) & (df[2]=="record")]
new_df = new_df[[4,7,10,13,16,19]]
new_df = new_df.rename(columns={
4: "timestamp",
7: "latitude",
10: "longitude",
13: "distance",
16: "altitude",
19: "speed"
})
new_df
You should get the following output :
timestamp latitude longitude distance altitude speed
8 896018545 504719750.0 -998493490.0 10.87 285.8 1.773