I'm using PySpark in a Microsoft SQL Server using JDBC for connection.
query = """(
WITH table_1 AS (
SELECT
code_1,
a
FROM my_database_table_1
),
table_2 AS (
SELECT
code_2,
b
FROM my_database_table_2
)
SELECT
table_1.code_1 AS tb1_code_1,
table_2.code_2 AS tb2_code_2
FROM table_1
INNER JOIN table_2
ON table_1.code_1 == table_2.APRCH_CODIGO
) AS _
"""
df_python = spark.read.jdbc(url=jdbc_url, table=query, properties=properties)
I'm getting the following error:
com.microsoft.sqlserver.jdbc.SQLServerException: Incorrect syntax near the keyword 'WITH'.
Does anyone know why I'm getting such error?
CodePudding user response:
In sql the equality operator is = and not ==, you put == there in the JOIN, maybe this is the error
CodePudding user response:
In sql the equality operator is = and not ==, you put == there in the JOIN, maybe this is the error.
try with this code
from pyspark.sql import SparkSession
import os
driver = "/home/romerito/Documents/apache-spark-3.1.2/spark-3.1.2-bin-hadoop3.2/jars/mssql-jdbc-9.2.1.jre11.jar"
spark = (
SparkSession
.builder
.appName("load-sample-jdbc")
.master("local[2]")
.config("spark.driver.extraClassPath", driver)
.getOrCreate()
)
credentials = (
readCredentials(os.getcwd() f"/others/credentials-mssql.txt")
)
server = credentials['server']
port = credentials['port']
database = credentials['database']
user = credentials['user']
password = credentials['password']
connection = f"jdbc:sqlserver://{server}:{port};databaseName={database}"
query = """(
WITH table_1 AS (
SELECT
code_1,
a
FROM my_database_table_1
),
table_2 AS (
SELECT
code_2,
b
FROM my_database_table_2
),
SELECT
table_1.code_1 AS tb1_code_1,
table_2.code_2 AS tb2_code_2
FROM table_1
INNER JOIN table_2
ON table_1.code_1 = table_2.APRCH_CODIGO
) AS _
"""
query = spark.read \
.format('jdbc') \
.option('url', f'{connection}') \
.option('user', f'{user}') \
.option('password', f'{password}') \
.option('dbtable', f'{query}')
query.show()