I used pip install pyspark in a python enviroment, java is installed but when i try to initialise a spark session I get a java error Java gateway process exited before sending its port number
spark = SparkSession \
.builder \
.appName("CustomerChurn") \
.master("local") \
.config() \
.getOrCreate()
RuntimeError Traceback (most recent call last)
Input In [3], in <cell line: 3>()
1 findspark.init()
3 spark = SparkSession \
4 .builder \
5 .appName("CustomerChurn") \
6 .master("local") \
7 .config() \
----> 8 .getOrCreate()
File ~\anaconda3\envs\CustomerChurnProject\lib\site-packages\pyspark\sql\session.py:269, in SparkSession.Builder.getOrCreate(self)
267 sparkConf.set(key, value)
268 # This SparkContext may be an existing one.
--> 269 sc = SparkContext.getOrCreate(sparkConf)
270 # Do not update `SparkConf` for existing `SparkContext`, as it's shared
271 # by all sessions.
272 session = SparkSession(sc, options=self._options)
File ~\anaconda3\envs\CustomerChurnProject\lib\site-packages\pyspark\context.py:483, in SparkContext.getOrCreate(cls, conf)
481 with SparkContext._lock:
482 if SparkContext._active_spark_context is None:
--> 483 SparkContext(conf=conf or SparkConf())
484 assert SparkContext._active_spark_context is not None
485 return SparkContext._active_spark_context
File ~\anaconda3\envs\CustomerChurnProject\lib\site-packages\pyspark\context.py:195, in SparkContext.__init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls, udf_profiler_cls)
189 if gateway is not None and gateway.gateway_parameters.auth_token is None:
190 raise ValueError(
191 "You are trying to pass an insecure Py4j gateway to Spark. This"
192 " is not allowed as it is a security risk."
193 )
--> 195 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
196 try:
197 self._do_init(
198 master,
199 appName,
(...)
208 udf_profiler_cls,
209 )
File ~\anaconda3\envs\CustomerChurnProject\lib\site-packages\pyspark\context.py:417, in SparkContext._ensure_initialized(cls, instance, gateway, conf)
415 with SparkContext._lock:
416 if not SparkContext._gateway:
--> 417 SparkContext._gateway = gateway or launch_gateway(conf)
418 SparkContext._jvm = SparkContext._gateway.jvm
420 if instance:
File ~\anaconda3\envs\CustomerChurnProject\lib\site-packages\pyspark\java_gateway.py:106, in launch_gateway(conf, popen_kwargs)
103 time.sleep(0.1)
105 if not os.path.isfile(conn_info_file):
--> 106 raise RuntimeError("Java gateway process exited before sending its port number")
108 with open(conn_info_file, "rb") as info:
109 gateway_port = read_int(info)
RuntimeError: Java gateway process exited before sending its port number
the run time error is posted above, I have not seen this type of error in other posts
CodePudding user response:
Based on your error logs, I think you need to specify the $JAVA_HOME variable on your system.
This link may help:
See this: https://confluence.atlassian.com/doc/setting-the-java_home-variable-in-windows-8895.html