I have a CSV file my_table.csv
that looks like the following:
"dt_start","my_int_value","my_double_value","dt_version"
"2022-01-02 00:00:00",2,2.2,"2022-01-02 00:00:00"
"2022-01-03 00:00:00",3,3.3,"2022-01-03 00:00:00"
Now I simply want to import this file into a table my_table
of my PostgreSQL database from Python using the instructions from the psycopg3
package (using psycopg==3.1.3
and psycopg-binary==3.1.3
).
My code looks as follows:
import os
import psycopg
table_name = "my_table"
conn = psycopg.connect(
dbname="MY_DB",
user="MY_USER",
password="MY_PW",
host="MY_HOST",
port="MY_PORT",
)
with conn:
with conn.cursor() as cur:
# create table
cur.execute(
f"""
CREATE TABLE IF NOT EXISTS {table_name} (
dt_start TIMESTAMP NOT NULL,
my_int_value INT NOT NULL,
my_double_value DOUBLE PRECISION NOT NULL,
dt_version TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(dt_start, my_int_value, my_double_value, dt_version)
)
"""
)
# clear table
cur.execute(f"TRUNCATE {table_name}")
conn.commit()
# insert one row
cur.execute(
f"""INSERT INTO {table_name}"""
f""" (dt_start, my_int_value, my_double_value, dt_version)"""
f""" VALUES (%s, %s, %s, %s)""",
("2022-01-01 00:00:00", 1, 1.1, "2022-01-01 00:00:00"),
)
conn.commit()
# fetch it
cur.execute(f"""SELECT * FROM {table_name}""")
print(cur.fetchall())
# this breaks with "psycopg.errors.InvalidDatetimeFormat"
with open(f"""{table_name}.csv""", "r") as f:
with cur.copy(f"COPY {table_name} FROM STDIN") as copy:
while data := f.read(100):
copy.write(data)
conn.commit()
The first steps with some sample data work perfectly, but the CSV import breaks with an error such as:
psycopg.errors.InvalidDatetimeFormat: invalid syntax for type timestamp without time zone: »"dt_start","my_int_value","my_double_value","dt_version"«
CONTEXT: COPY my_table, Row 1, Column dt_start: »"dt_start","my_int_value","my_double_value","dt_version"«
Meanwhile, I have also tried different import variants from the docs and different datetime formats, but all result in the same error.
Any hints on how to fix this problem?
CodePudding user response:
create table import_test(dt_start timestamp, my_int_value integer, my_double_value float, dt_version timestamp);
cat import_test.csv
"dt_start","my_int_value","my_double_value","dt_version"
"2022-01-02 00:00:00",2,2.2,"2022-01-02 00:00:00"
"2022-01-03 00:00:00",3,3.3,"2022-01-03 00:00:00"
import psycopg
from psycopg import sql
con = psycopg.connect("dbname=test user=postgres host=localhost port=5432")
with open('import_test.csv') as f:
with cur.copy(sql.SQL('COPY {} FROM STDIN WITH(FORMAT CSV, HEADER)').format(sql.Identifier('import_test'))) as copy:
while data := f.read(100):
copy.write(data)
con.commit()
select * from import_test ;
dt_start | my_int_value | my_double_value | dt_version
--------------------- -------------- ----------------- ---------------------
2022-01-02 00:00:00 | 2 | 2.2 | 2022-01-02 00:00:00
2022-01-03 00:00:00 | 3 | 3.3 | 2022-01-03 00:00:00
CodePudding user response:
The file contains a header, tell Postgres about that:
COPY my_table FROM STDIN (FORMAT csv, HEADER true)
Check the syntax in the documentation.