BUILD-VERSION: Commit 0abe49552a6fcdf6a5aca565d624cda255342c33 on branch 0.6.2-KNIME_cdh-5.13 built on 2017-12-06 18:06:16+01:00 [2018-03-28 15:03:08,429] INFO spark.jobserver.JobServer$ [] [] - Starting JobServer with config { # system properties "ui" : { # system properties "enabled" : "true", # system properties "killEnabled" : "true" }, # system properties "app" : { # system properties "name" : "spark.jobserver.JobServer" }, # application.conf: 5 # spark web UI port "webUrlPort" : 8080, # system properties "submit" : { # system properties "deployMode" : "client" }, # system properties "serializer" : "org.apache.spark.serializer.KryoSerializer", # system properties "lineage" : { # system properties "enabled" : "false", # system properties "log" : { # system properties "dir" : "/var/log/spark/lineage" } }, # system properties "executor" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "uri" : "", # system properties "extraJavaOptions" : "-Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server" }, # merge of /opt/spark-job-server/environment.conf: 25,application.conf: 7 # ######################################################################### # Job server settings # ######################################################################### "jobserver" : { # /opt/spark-job-server/environment.conf: 27 # TCP port that the job server listens on for HTTP requests "port" : 8090, # merge of /opt/spark-job-server/environment.conf: 37,application.conf: 40 "sqldao" : { # /opt/spark-job-server/environment.conf: 38 "rootdir" : "/tmp/spark-job-server/sqldao/data", # application.conf: 42 # Slick database driver, full classpath "slick-driver" : "scala.slick.driver.H2Driver", # /opt/spark-job-server/environment.conf: 41 "dbcp" : { # /opt/spark-job-server/environment.conf: 45 "maxidle" : 10, # /opt/spark-job-server/environment.conf: 43 "maxactive" : 20, # /opt/spark-job-server/environment.conf: 42 "connectonstart" : true, # /opt/spark-job-server/environment.conf: 44 "minidle" : 1, # /opt/spark-job-server/environment.conf: 46 "initialsize" : 10 }, # application.conf: 45 # JDBC driver, full classpath "jdbc-driver" : "org.h2.Driver", # merge of /opt/spark-job-server/environment.conf: 39,application.conf: 52 # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass "jdbc" : { # application.conf: 55 "password" : "", # application.conf: 54 "user" : "", # /opt/spark-job-server/environment.conf: 39 "url" : "jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db;AUTO_SERVER=TRUE;AUTO_RECONNECT=TRUE" } }, # application.conf: 87 # spark broadcst factory in yarn deployment # Versions prior to 1.1.0, spark default broadcast factory is org.apache.spark.broadcast.HttpBroadcastFactory. # Can't start multiple sparkContexts in the same JVM with HttpBroadcastFactory. "yarn-broadcast-factory" : "org.apache.spark.broadcast.TorrentBroadcastFactory", # /opt/spark-job-server/environment.conf: 52 # Storage directory for files that are uploaded to the server # via POST/data commands "datadao" : { # /opt/spark-job-server/environment.conf: 53 "rootdir" : "/tmp/spark-job-server/upload" }, # application.conf: 9 "bind-address" : "0.0.0.0", # /opt/spark-job-server/environment.conf: 65 # Timeouts for Spark context creation in seconds. In "yarn-client" mode, use # the first option, in all other modes, use the second one. If context # creation takes longer than the timeout, the jobserver closes the context. # Especially in yarn-client mode, context creation includes container # allocation, which can take a while. "yarn-context-creation-timeout" : "60 s", # /opt/spark-job-server/environment.conf: 66 "context-creation-timeout" : "60 s", # application.conf: 69 # The ask pattern timeout for Api "short-timeout" : "3 s", # /opt/spark-job-server/environment.conf: 58 # Number of jobs that can be run simultaneously per context. If not set, # defaults to number of cores on machine where Spark job server is running. "max-jobs-per-context" : 100, # /opt/spark-job-server/environment.conf: 36 # Class to use to persist data such as jars, applications, jobs, etc. # spark.jobserver.io.JobFileDAO uses the file system for persistence # spark.jobserver.io.JobSqlDAO uses an SQL database for persistence # "jobdao" : "spark.jobserver.io.JobSqlDAO", # /opt/spark-job-server/environment.conf: 30 # Directory where the job server stores uploaded jar files "jar-store-rootdir" : "/tmp/spark-job-server/jars", # application.conf: 24 "filedao" : { # application.conf: 25 "rootdir" : "/tmp/spark-jobserver/filedao/data" }, # application.conf: 15 # Number of job results to keep per JobResultActor/context "job-result-cache-size" : 5000, # /opt/spark-job-server/environment.conf: 71 # If true, a separate JVM is forked for each Spark context. # KERBEROS NOTE: In a Kerberos-enabled environment, you should set this to true # (as well as shiro authentication, see later in this file). "context-per-jvm" : true, # application.conf: 75 # Time out for job server to wait while creating named objects "named-object-creation-timeout" : "60 s" }, # merge of /opt/spark-job-server/environment.conf: 90,application.conf: 103 # Default settings for Spark contexts. These settings can be overridden on a # per-context basis. Please consult the Spark documentation for more details # on available settings. # Default settings for ad hoc as well as manually created contexts # You can add any Spark config params here, for example, spark.mesos.coarse = true "context-settings" : { # application.conf: 111 "streaming" : { # application.conf: 116 # if true, stops gracefully by waiting for the processing of all received data to be completed "stopGracefully" : true, # application.conf: 120 # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be # stopped regardless of whether the StreamingContext has been started. "stopSparkContext" : true, # application.conf: 113 # Default batch interval for Spark Streaming contexts in milliseconds "batch_interval" : 1000 }, # application.conf: 127 # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize "context-init-timeout" : "60s", # application.conf: 109 # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext "context-factory" : "spark.jobserver.context.DefaultSparkContextFactory", # application.conf: 129 "passthrough" : { # application.conf: 130 "spark" : { # application.conf: 130 "driver" : { # application.conf: 130 "allowMultipleContexts" : true } } }, # /opt/spark-job-server/environment.conf: 106 # Required setting, that sets "spark.executor.memory" in the SparkConf. Sets # the amount of memory to use per Spark executor process, in the same format # as JVM memory strings (e.g. 512m, 2g). # # Note: In "yarn-client" mode, Spark will request this amount of memory per YARN # container, plus some additional overhead. "memory-per-node" : "1G", # /opt/spark-job-server/environment.conf: 98 # Required setting, that sets "spark.cores.max" in the SparkConf. This sets # the maximum amount of CPU cores to request for the Spark on the cluster # (not from each machine). # # IMPORTANT: Note that although required by job-server, this setting only # has an effect in Standalone and Mesos clusters. "num-cpu-cores" : 2 }, # /opt/spark-job-server/environment.conf: 82 # Sets the "spark.master" property in the SparkConf. KNIME recommends # "yarn-client" for production use and "local[4]" for debugging purposes. # master = "local[4]" # Run Spark locally with 4 worker threads "master" : "yarn-client", # system properties "dynamicAllocation" : { # system properties "enabled" : "true", # system properties "minExecutors" : "0", # system properties "executorIdleTimeout" : "60", # system properties "schedulerBacklogTimeout" : "1" }, # system properties "authenticate" : { # system properties "enableSaslEncryption" : "false" }, # system properties "sql" : { # system properties "queryExecutionListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener" }, # system properties "shuffle" : { # system properties "encryption" : { # system properties "enabled" : "false" }, # system properties "service" : { # system properties "port" : "7337", # system properties "enabled" : "true" } }, # system properties "eventLog" : { # system properties "enabled" : "true", # system properties "dir" : "hdfs://ip-172-31-34-104.us-west-2.compute.internal:8020/user/spark/applicationHistory" }, # system properties "driver" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "extraJavaOptions" : "-XX:+UseConcMarkSweepGC\n -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:/opt/spark-job-server/gc.out\n -XX:MaxPermSize=512m\n -XX:+CMSClassUnloadingEnabled -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server -Dspark.executor.uri= ", # system properties "memory" : "2G" }, # application.conf: 93 # predefined Spark contexts # Below is an example, but do not uncomment it. Everything defined here is carried over to # deploy-time configs, so they will be created in all environments. :( "contexts" : {}, # system properties "extraListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener", # system properties "yarn" : { # system properties "am" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native" }, # system properties "jar" : "local:/usr/lib/spark/lib/spark-assembly.jar", # system properties "historyServer" : { # system properties "allowTracking" : "true", # system properties "address" : "http://ip-172-31-34-104.us-west-2.compute.internal:18088" } }, # system properties "jars" : "file:/opt/spark-job-server/spark-job-server.jar" } [2018-03-28 15:03:08,432] INFO spark.jobserver.JobServer$ [] [] - Spray config: { # reference.conf: 1478 # After a `Timedout` message has been sent to the timeout handler and the # request still hasn't been completed after the time period set here # the server will complete the request itself with an error response. # Set to `infinite` to disable timeout timeouts. "timeout-timeout" : "2 s", # application.conf: 187 # ssl engine provider protocols "enabledProtocols" : [ # application.conf: 187 "SSLv3", # application.conf: 187 "TLSv1" ], # application.conf: 190 "pipelining-limit" : 2, # reference.conf: 1575 # The time period within which a connection handler must have been # registered after the bind handler has received a `Connected` event. # Set to `infinite` to disable. "registration-timeout" : "1s", # /opt/spark-job-server/environment.conf: 154 # parsing { # max-content-length = 200m # } "request-timeout" : "60 s", # reference.conf: 1600 "back-pressure" : { # reference.conf: 1604 # The reciprocal rate of requested Acks per NoAcks. E.g. the default value # '10' means that every 10th write request is acknowledged. This affects the # number of writes each connection has to buffer even in absence of back-pressure. "noack-rate" : 10, # reference.conf: 1609 # The lower limit the write queue size has to shrink to before reads are resumed. # Use 'infinite' to disable the low-watermark so that reading is resumed instantly # after the next successful write. "reading-low-watermark" : "infinite" }, # reference.conf: 1570 # The time period within which the TCP unbinding process must be completed. # Set to `infinite` to disable. "unbind-timeout" : "1s", # reference.conf: 1500 # Enables/disables support for statistics collection and querying. # Even though stats keeping overhead is small, # for maximum performance switch off when not needed. "stats-support" : "on", # reference.conf: 1490 # The path of the actor to send `spray.http.Timedout` messages to. # If empty all `Timedout` messages will go to the "regular" request # handling actor. "timeout-handler" : "", # /opt/spark-job-server/environment.conf: 155 "idle-timeout" : "120 s", # /opt/spark-job-server/environment.conf: 156 "request-chunk-aggregation-limit" : "200m", # reference.conf: 1449 # Enables/disables SSL encryption. # If enabled the server uses the implicit `ServerSSLEngineProvider` member # of the `Bind` command to create `SSLEngine` instances for the underlying # IO connection. "ssl-encryption" : "off", # application.conf: 192 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge) # Needed for HTTP/1.0 requests with missing Host headers "default-host-header" : "spray.io:8765", # reference.conf: 1504 # Enables/disables the addition of a `Remote-Address` header # holding the clients (remote) IP address. "remote-address-header" : "off", # reference.conf: 1584 # The time after which a connection is aborted (RST) after a parsing error # occurred. The timeout prevents a connection which is already known to be # erroneous from receiving evermore data even if all of the data will be ignored. # However, in case of a connection abortion the client usually doesn't properly # receive the error response. This timeout is a trade-off which allows the client # some time to finish its request and receive a proper error response before the # connection is forcibly closed to free resources. "parsing-error-abort-timeout" : "2s", # merge of application.conf: 195,reference.conf: 1750 # The (default) configuration of the HTTP message parser for the server and # the client. # IMPORTANT: These settings (i.e. children of `spray.can.parsing`) can't be directly # overridden in `application.conf` to change the parser settings for client and server # altogether (see https://github.com/spray/spray/issues/346). Instead, override the # concrete settings beneath `spray.can.server.parsing` and `spray.can.client.parsing` # where these settings are copied to. "parsing" : { # reference.conf: 1758 "max-chunk-ext-length" : 256, # reference.conf: 1759 "max-chunk-size" : "1m", # application.conf: 195 # Increase this in order to upload bigger job jars "max-content-length" : "30m", # reference.conf: 1756 "max-header-count" : 64, # reference.conf: 1773 # Sets the strictness mode for parsing request target URIs. # The following values are defined: # # `strict`: RFC3986-compliant URIs are required, # a 400 response is triggered on violations # # `relaxed`: all visible 7-Bit ASCII chars are allowed # # `relaxed-with-raw-query`: like `relaxed` but additionally # the URI query is not parsed, but delivered as one raw string # as the `key` value of a single Query structure element. # "uri-parsing-mode" : "strict", # reference.conf: 1753 "max-response-reason-length" : 64, # reference.conf: 1783 # Enables/disables the logging of warning messages in case an incoming # message (request or response) contains an HTTP header which cannot be # parsed into its high-level model class due to incompatible syntax. # Note that, independently of this settings, spray will accept messages # with such headers as long as the message as a whole would still be legal # under the HTTP specification even without this header. # If a header cannot be parsed into a high-level model instance it will be # provided as a `RawHeader`. "illegal-header-warnings" : "on", # reference.conf: 1755 "max-header-value-length" : "8k", # reference.conf: 1787 # limits for the number of different values per header type that the # header cache will hold "header-cache" : { # reference.conf: 1792 "If-Modified-Since" : 0, # reference.conf: 1794 "If-Range" : 0, # reference.conf: 1788 "default" : 12, # reference.conf: 1790 "Date" : 0, # reference.conf: 1789 "Content-MD5" : 0, # reference.conf: 1796 "User-Agent" : 32, # reference.conf: 1795 "If-Unmodified-Since" : 0, # reference.conf: 1793 "If-None-Match" : 0, # reference.conf: 1791 "If-Match" : 0 }, # reference.conf: 1752 # The limits for the various parts of the HTTP message parser. "max-uri-length" : "2k", # reference.conf: 1754 "max-header-name-length" : 64, # reference.conf: 1807 # Enables/disables inclusion of an SSL-Session-Info header in parsed # messages over SSL transports (i.e., HttpRequest on server side and # HttpResponse on client side). "ssl-session-info-header" : "off", # reference.conf: 1802 # Sets the size starting from which incoming http-messages will be delivered # in chunks regardless of whether chunking is actually used on the wire. # Set to infinite to disable auto chunking. "incoming-auto-chunking-threshold-size" : "infinite" }, # reference.conf: 1566 # The time period within which the TCP binding process must be completed. # Set to `infinite` to disable. "bind-timeout" : "1s", # reference.conf: 1536 # Enables/disables the returning of more detailed error messages to # the client in the error response. # Should be disabled for browser-facing APIs due to the risk of XSS attacks # and (probably) enabled for internal or non-browser APIs. # Note that spray will always produce log messages containing the full # error details. "verbose-error-messages" : "off", # reference.conf: 1516 # Enables/disables automatic handling of HEAD requests. # If this setting is enabled the server dispatches HEAD requests as GET # requests to the application and automatically strips off all message # bodies from outgoing responses. # Note that, even when this setting is off the server will never send # out message bodies on responses to HEAD requests. "transparent-head-requests" : "on", # application.conf: 183 "keystoreType" : "JKS", # reference.conf: 1541 # Enables/disables the logging of the full (potentially multiple line) # error message to the server logs. # If disabled only a single line will be logged. "verbose-error-logging" : "off", # reference.conf: 1598 # Enables/disables automatic back-pressure handling by write buffering and # receive throttling "automatic-back-pressure-handling" : "on", # reference.conf: 1443-2058 # Always contains the deployed version of spray. # Referenced, for example, from the `spray.can.server.server-header` setting. "server-header" : "spray-can/1.2.3", # application.conf: 185 # key manager factory provider "provider" : "SunX509", # reference.conf: 1613 # Enables more verbose DEBUG logging for debugging SSL related issues. "ssl-tracing" : "off", # reference.conf: 1562 # For HTTPS connections this setting specified the maximum number of # bytes that are encrypted in one go. Large responses are broken down in # chunks of this size so as to already begin sending before the response has # been encrypted entirely. "max-encryption-chunk-size" : "1m", # reference.conf: 1508 # Enables/disables the addition of a `Raw-Request-URI` header holding the # original raw request URI as the client has sent it. "raw-request-uri-header" : "off", # application.conf: 182 # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples # typical are either SSL or TLS "encryptionType" : "SSL", # reference.conf: 1485 # The period during which a service must respond to a `ChunkedRequestStart` message # with a `RegisterChunkHandler` message. During the registration period reading from # the network is suspended. It is still possible that some chunks have already been # received which will be buffered until the registration is received or the timeout is # triggered. If the timeout is triggered the connection is immediately aborted. "chunkhandler-registration-timeout" : "500 ms", # reference.conf: 1556 # The initial size if the buffer to render the response headers in. # Can be used for fine-tuning response rendering performance but probably # doesn't have to be fiddled with in most applications. "response-header-size-hint" : 512, # reference.conf: 1528 # Enables/disables an alternative response streaming mode that doesn't # use `Transfer-Encoding: chunked` but rather renders the individual # MessageChunks coming in from the application as parts of the original # response entity. # Enabling this mode causes all connections to be closed after a streaming # response has been finished since there is no other way to signal the # response end to the client. # Note that chunkless-streaming is implicitly enabled when streaming # responses to HTTP/1.0 clients (since they don't support # `Transfer-Encoding: chunked`) "chunkless-streaming" : "off", # reference.conf: 1495 # The "granularity" of timeout checking for both idle connections timeouts # as well as request timeouts, should rarely be needed to modify. # If set to `infinite` request and connection timeout checking is disabled. "reaping-cycle" : "250 ms" } [2018-03-28 15:03:08,689] INFO ka.event.slf4j.Slf4jLogger [] [] - Slf4jLogger started [2018-03-28 15:03:08,767] INFO Remoting [] [Remoting] - Starting remoting [2018-03-28 15:03:08,910] INFO Remoting [] [Remoting] - Remoting started; listening on addresses :[akka.tcp://JobServer@127.0.0.1:43026] [2018-03-28 15:03:08,912] INFO Remoting [] [Remoting] - Remoting now listens on addresses: [akka.tcp://JobServer@127.0.0.1:43026] [2018-03-28 15:03:08,933] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:43026] - Starting up... [2018-03-28 15:03:09,010] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:43026] - Registered cluster JMX MBean [akka:type=Cluster] [2018-03-28 15:03:09,010] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:43026] - Started up successfully [2018-03-28 15:03:09,022] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:43026] - No seed-nodes configured, manual cluster join required [2018-03-28 15:03:09,886] INFO ark.jobserver.io.JobSqlDAO [] [] - rootDir is /tmp/spark-job-server/sqldao/data [2018-03-28 15:03:09,984] INFO ternal.util.VersionPrinter [] [] - Flyway 3.2.1 by Boxfuse [2018-03-28 15:03:10,476] INFO dbsupport.DbSupportFactory [] [] - Database: jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db (H2 1.3) [2018-03-28 15:03:10,765] INFO nternal.command.DbValidate [] [] - Validated 1 migration (execution time 00:00.124s) [2018-03-28 15:03:10,785] INFO internal.command.DbMigrate [] [] - Current version of schema "PUBLIC": 0.7.0 [2018-03-28 15:03:10,786] INFO internal.command.DbMigrate [] [] - Schema "PUBLIC" is up to date. No migration necessary. [2018-03-28 15:03:10,831] INFO ark.jobserver.io.JobSqlDAO [] [] - Running dummy query to initialize dbcp. [2018-03-28 15:03:11,368] INFO k.jobserver.io.JobDAOActor [] [akka://JobServer/user/dao-manager] - Starting actor spark.jobserver.io.JobDAOActor [2018-03-28 15:03:11,369] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Starting actor spark.jobserver.DataManagerActor [2018-03-28 15:03:11,371] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Starting actor spark.jobserver.JarManager [2018-03-28 15:03:11,373] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting actor spark.jobserver.AkkaClusterSupervisorActor [2018-03-28 15:03:11,374] INFO ark.jobserver.JobInfoActor [] [akka://JobServer/user/job-info] - Starting actor spark.jobserver.JobInfoActor [2018-03-28 15:03:11,406] INFO AkkaClusterSupervisorActor [] [] - AkkaClusterSupervisor initialized on akka.tcp://JobServer@127.0.0.1:43026 [2018-03-28 15:03:11,407] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/global-result-actor] - Starting actor spark.jobserver.JobResultActor [2018-03-28 15:03:11,445] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:43026] - Node [akka.tcp://JobServer@127.0.0.1:43026] is JOINING, roles [supervisor] [2018-03-28 15:03:11,736] INFO spark.jobserver.WebApi [] [] - No authentication. [2018-03-28 15:03:11,945] INFO spark.jobserver.WebApi [] [] - Starting browser web service... [2018-03-28 15:03:12,073] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:43026] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:43026] to [Up] [2018-03-28 15:03:12,720] INFO ay.can.server.HttpListener [] [akka://JobServer/user/IO-HTTP/listener-0] - Bound to /0.0.0.0:8090 [2018-03-28 15:05:29,135] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-ab-9ba2-012cd14be174 [2018-03-28 15:05:29,142] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext9103052303242692117 for context knimeSparkContext [2018-03-28 15:05:29,153] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext9103052303242692117, akka.tcp://JobServer@127.0.0.1:43026] [2018-03-28 15:05:31,429] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:43026] - Node [akka.tcp://JobServer@127.0.0.1:34076] is JOINING, roles [manager] [2018-03-28 15:05:32,043] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:43026] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:34076] to [Up] [2018-03-28 15:05:32,148] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:34076/user/* [2018-03-28 15:05:32,150] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$a] - Starting actor spark.jobserver.JobResultActor [2018-03-28 15:05:40,488] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-28 15:05:40,614] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_c1d68d31852ba93bbe21b51883e769ded6594a8e_spark-1.6.cdh5_9, 398147 bytes [2018-03-28 15:05:41,359] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:55:02,994] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:55:03,098] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-28 16:55:03,251] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:55:26,814] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:55:29,984] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:55:37,425] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:55:45,595] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:56:02,772] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [8] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:56:18,542] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [9] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:56:48,751] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-558507439] to Actor[akka://JobServer/deadLetters] was not delivered. [10] dead letters encountered, no more dead letters will be logged. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-28 16:58:26,325] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-28 16:59:20,576] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes BUILD-VERSION: Commit 0abe49552a6fcdf6a5aca565d624cda255342c33 on branch 0.6.2-KNIME_cdh-5.13 built on 2017-12-06 18:06:16+01:00 [2018-03-29 11:30:01,132] INFO spark.jobserver.JobServer$ [] [] - Starting JobServer with config { # system properties "ui" : { # system properties "enabled" : "true", # system properties "killEnabled" : "true" }, # system properties "app" : { # system properties "name" : "spark.jobserver.JobServer" }, # application.conf: 5 # spark web UI port "webUrlPort" : 8080, # system properties "submit" : { # system properties "deployMode" : "client" }, # system properties "serializer" : "org.apache.spark.serializer.KryoSerializer", # system properties "lineage" : { # system properties "enabled" : "false", # system properties "log" : { # system properties "dir" : "/var/log/spark/lineage" } }, # system properties "executor" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "uri" : "", # system properties "extraJavaOptions" : "-Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server" }, # merge of /opt/spark-job-server/environment.conf: 25,application.conf: 7 # ######################################################################### # Job server settings # ######################################################################### "jobserver" : { # /opt/spark-job-server/environment.conf: 27 # TCP port that the job server listens on for HTTP requests "port" : 8090, # merge of /opt/spark-job-server/environment.conf: 37,application.conf: 40 "sqldao" : { # /opt/spark-job-server/environment.conf: 38 "rootdir" : "/tmp/spark-job-server/sqldao/data", # application.conf: 42 # Slick database driver, full classpath "slick-driver" : "scala.slick.driver.H2Driver", # /opt/spark-job-server/environment.conf: 41 "dbcp" : { # /opt/spark-job-server/environment.conf: 45 "maxidle" : 10, # /opt/spark-job-server/environment.conf: 43 "maxactive" : 20, # /opt/spark-job-server/environment.conf: 42 "connectonstart" : true, # /opt/spark-job-server/environment.conf: 44 "minidle" : 1, # /opt/spark-job-server/environment.conf: 46 "initialsize" : 10 }, # application.conf: 45 # JDBC driver, full classpath "jdbc-driver" : "org.h2.Driver", # merge of /opt/spark-job-server/environment.conf: 39,application.conf: 52 # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass "jdbc" : { # application.conf: 55 "password" : "", # application.conf: 54 "user" : "", # /opt/spark-job-server/environment.conf: 39 "url" : "jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db;AUTO_SERVER=TRUE;AUTO_RECONNECT=TRUE" } }, # application.conf: 87 # spark broadcst factory in yarn deployment # Versions prior to 1.1.0, spark default broadcast factory is org.apache.spark.broadcast.HttpBroadcastFactory. # Can't start multiple sparkContexts in the same JVM with HttpBroadcastFactory. "yarn-broadcast-factory" : "org.apache.spark.broadcast.TorrentBroadcastFactory", # /opt/spark-job-server/environment.conf: 52 # Storage directory for files that are uploaded to the server # via POST/data commands "datadao" : { # /opt/spark-job-server/environment.conf: 53 "rootdir" : "/tmp/spark-job-server/upload" }, # application.conf: 9 "bind-address" : "0.0.0.0", # /opt/spark-job-server/environment.conf: 65 # Timeouts for Spark context creation in seconds. In "yarn-client" mode, use # the first option, in all other modes, use the second one. If context # creation takes longer than the timeout, the jobserver closes the context. # Especially in yarn-client mode, context creation includes container # allocation, which can take a while. "yarn-context-creation-timeout" : "60 s", # /opt/spark-job-server/environment.conf: 66 "context-creation-timeout" : "60 s", # application.conf: 69 # The ask pattern timeout for Api "short-timeout" : "3 s", # /opt/spark-job-server/environment.conf: 58 # Number of jobs that can be run simultaneously per context. If not set, # defaults to number of cores on machine where Spark job server is running. "max-jobs-per-context" : 100, # /opt/spark-job-server/environment.conf: 36 # Class to use to persist data such as jars, applications, jobs, etc. # spark.jobserver.io.JobFileDAO uses the file system for persistence # spark.jobserver.io.JobSqlDAO uses an SQL database for persistence # "jobdao" : "spark.jobserver.io.JobSqlDAO", # /opt/spark-job-server/environment.conf: 30 # Directory where the job server stores uploaded jar files "jar-store-rootdir" : "/tmp/spark-job-server/jars", # application.conf: 24 "filedao" : { # application.conf: 25 "rootdir" : "/tmp/spark-jobserver/filedao/data" }, # application.conf: 15 # Number of job results to keep per JobResultActor/context "job-result-cache-size" : 5000, # /opt/spark-job-server/environment.conf: 71 # If true, a separate JVM is forked for each Spark context. # KERBEROS NOTE: In a Kerberos-enabled environment, you should set this to true # (as well as shiro authentication, see later in this file). "context-per-jvm" : true, # application.conf: 75 # Time out for job server to wait while creating named objects "named-object-creation-timeout" : "60 s" }, # merge of /opt/spark-job-server/environment.conf: 90,application.conf: 103 # Default settings for Spark contexts. These settings can be overridden on a # per-context basis. Please consult the Spark documentation for more details # on available settings. # Default settings for ad hoc as well as manually created contexts # You can add any Spark config params here, for example, spark.mesos.coarse = true "context-settings" : { # application.conf: 111 "streaming" : { # application.conf: 116 # if true, stops gracefully by waiting for the processing of all received data to be completed "stopGracefully" : true, # application.conf: 120 # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be # stopped regardless of whether the StreamingContext has been started. "stopSparkContext" : true, # application.conf: 113 # Default batch interval for Spark Streaming contexts in milliseconds "batch_interval" : 1000 }, # application.conf: 127 # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize "context-init-timeout" : "60s", # application.conf: 109 # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext "context-factory" : "spark.jobserver.context.DefaultSparkContextFactory", # application.conf: 129 "passthrough" : { # application.conf: 130 "spark" : { # application.conf: 130 "driver" : { # application.conf: 130 "allowMultipleContexts" : true } } }, # /opt/spark-job-server/environment.conf: 106 # Required setting, that sets "spark.executor.memory" in the SparkConf. Sets # the amount of memory to use per Spark executor process, in the same format # as JVM memory strings (e.g. 512m, 2g). # # Note: In "yarn-client" mode, Spark will request this amount of memory per YARN # container, plus some additional overhead. "memory-per-node" : "1G", # /opt/spark-job-server/environment.conf: 98 # Required setting, that sets "spark.cores.max" in the SparkConf. This sets # the maximum amount of CPU cores to request for the Spark on the cluster # (not from each machine). # # IMPORTANT: Note that although required by job-server, this setting only # has an effect in Standalone and Mesos clusters. "num-cpu-cores" : 2 }, # /opt/spark-job-server/environment.conf: 82 # Sets the "spark.master" property in the SparkConf. KNIME recommends # "yarn-client" for production use and "local[4]" for debugging purposes. # master = "local[4]" # Run Spark locally with 4 worker threads "master" : "yarn-client", # system properties "dynamicAllocation" : { # system properties "enabled" : "true", # system properties "minExecutors" : "0", # system properties "executorIdleTimeout" : "60", # system properties "schedulerBacklogTimeout" : "1" }, # system properties "authenticate" : { # system properties "enableSaslEncryption" : "false" }, # system properties "sql" : { # system properties "queryExecutionListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener" }, # system properties "shuffle" : { # system properties "encryption" : { # system properties "enabled" : "false" }, # system properties "service" : { # system properties "port" : "7337", # system properties "enabled" : "true" } }, # system properties "eventLog" : { # system properties "enabled" : "true", # system properties "dir" : "hdfs://ip-172-31-34-104.us-west-2.compute.internal:8020/user/spark/applicationHistory" }, # system properties "driver" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "extraJavaOptions" : "-XX:+UseConcMarkSweepGC\n -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:/opt/spark-job-server/gc.out\n -XX:MaxPermSize=512m\n -XX:+CMSClassUnloadingEnabled -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server -Dspark.executor.uri= ", # system properties "memory" : "2G" }, # application.conf: 93 # predefined Spark contexts # Below is an example, but do not uncomment it. Everything defined here is carried over to # deploy-time configs, so they will be created in all environments. :( "contexts" : {}, # system properties "extraListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener", # system properties "yarn" : { # system properties "am" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native" }, # system properties "jar" : "local:/usr/lib/spark/lib/spark-assembly.jar", # system properties "historyServer" : { # system properties "allowTracking" : "true", # system properties "address" : "http://ip-172-31-34-104.us-west-2.compute.internal:18088" } }, # system properties "jars" : "file:/opt/spark-job-server/spark-job-server.jar" } [2018-03-29 11:30:01,141] INFO spark.jobserver.JobServer$ [] [] - Spray config: { # reference.conf: 1478 # After a `Timedout` message has been sent to the timeout handler and the # request still hasn't been completed after the time period set here # the server will complete the request itself with an error response. # Set to `infinite` to disable timeout timeouts. "timeout-timeout" : "2 s", # application.conf: 187 # ssl engine provider protocols "enabledProtocols" : [ # application.conf: 187 "SSLv3", # application.conf: 187 "TLSv1" ], # application.conf: 190 "pipelining-limit" : 2, # reference.conf: 1575 # The time period within which a connection handler must have been # registered after the bind handler has received a `Connected` event. # Set to `infinite` to disable. "registration-timeout" : "1s", # /opt/spark-job-server/environment.conf: 154 # parsing { # max-content-length = 200m # } "request-timeout" : "60 s", # reference.conf: 1600 "back-pressure" : { # reference.conf: 1604 # The reciprocal rate of requested Acks per NoAcks. E.g. the default value # '10' means that every 10th write request is acknowledged. This affects the # number of writes each connection has to buffer even in absence of back-pressure. "noack-rate" : 10, # reference.conf: 1609 # The lower limit the write queue size has to shrink to before reads are resumed. # Use 'infinite' to disable the low-watermark so that reading is resumed instantly # after the next successful write. "reading-low-watermark" : "infinite" }, # reference.conf: 1570 # The time period within which the TCP unbinding process must be completed. # Set to `infinite` to disable. "unbind-timeout" : "1s", # reference.conf: 1500 # Enables/disables support for statistics collection and querying. # Even though stats keeping overhead is small, # for maximum performance switch off when not needed. "stats-support" : "on", # reference.conf: 1490 # The path of the actor to send `spray.http.Timedout` messages to. # If empty all `Timedout` messages will go to the "regular" request # handling actor. "timeout-handler" : "", # /opt/spark-job-server/environment.conf: 155 "idle-timeout" : "120 s", # /opt/spark-job-server/environment.conf: 156 "request-chunk-aggregation-limit" : "200m", # reference.conf: 1449 # Enables/disables SSL encryption. # If enabled the server uses the implicit `ServerSSLEngineProvider` member # of the `Bind` command to create `SSLEngine` instances for the underlying # IO connection. "ssl-encryption" : "off", # application.conf: 192 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge) # Needed for HTTP/1.0 requests with missing Host headers "default-host-header" : "spray.io:8765", # reference.conf: 1504 # Enables/disables the addition of a `Remote-Address` header # holding the clients (remote) IP address. "remote-address-header" : "off", # reference.conf: 1584 # The time after which a connection is aborted (RST) after a parsing error # occurred. The timeout prevents a connection which is already known to be # erroneous from receiving evermore data even if all of the data will be ignored. # However, in case of a connection abortion the client usually doesn't properly # receive the error response. This timeout is a trade-off which allows the client # some time to finish its request and receive a proper error response before the # connection is forcibly closed to free resources. "parsing-error-abort-timeout" : "2s", # merge of application.conf: 195,reference.conf: 1750 # The (default) configuration of the HTTP message parser for the server and # the client. # IMPORTANT: These settings (i.e. children of `spray.can.parsing`) can't be directly # overridden in `application.conf` to change the parser settings for client and server # altogether (see https://github.com/spray/spray/issues/346). Instead, override the # concrete settings beneath `spray.can.server.parsing` and `spray.can.client.parsing` # where these settings are copied to. "parsing" : { # reference.conf: 1758 "max-chunk-ext-length" : 256, # reference.conf: 1759 "max-chunk-size" : "1m", # application.conf: 195 # Increase this in order to upload bigger job jars "max-content-length" : "30m", # reference.conf: 1756 "max-header-count" : 64, # reference.conf: 1773 # Sets the strictness mode for parsing request target URIs. # The following values are defined: # # `strict`: RFC3986-compliant URIs are required, # a 400 response is triggered on violations # # `relaxed`: all visible 7-Bit ASCII chars are allowed # # `relaxed-with-raw-query`: like `relaxed` but additionally # the URI query is not parsed, but delivered as one raw string # as the `key` value of a single Query structure element. # "uri-parsing-mode" : "strict", # reference.conf: 1753 "max-response-reason-length" : 64, # reference.conf: 1783 # Enables/disables the logging of warning messages in case an incoming # message (request or response) contains an HTTP header which cannot be # parsed into its high-level model class due to incompatible syntax. # Note that, independently of this settings, spray will accept messages # with such headers as long as the message as a whole would still be legal # under the HTTP specification even without this header. # If a header cannot be parsed into a high-level model instance it will be # provided as a `RawHeader`. "illegal-header-warnings" : "on", # reference.conf: 1755 "max-header-value-length" : "8k", # reference.conf: 1787 # limits for the number of different values per header type that the # header cache will hold "header-cache" : { # reference.conf: 1792 "If-Modified-Since" : 0, # reference.conf: 1794 "If-Range" : 0, # reference.conf: 1788 "default" : 12, # reference.conf: 1790 "Date" : 0, # reference.conf: 1789 "Content-MD5" : 0, # reference.conf: 1796 "User-Agent" : 32, # reference.conf: 1795 "If-Unmodified-Since" : 0, # reference.conf: 1793 "If-None-Match" : 0, # reference.conf: 1791 "If-Match" : 0 }, # reference.conf: 1752 # The limits for the various parts of the HTTP message parser. "max-uri-length" : "2k", # reference.conf: 1754 "max-header-name-length" : 64, # reference.conf: 1807 # Enables/disables inclusion of an SSL-Session-Info header in parsed # messages over SSL transports (i.e., HttpRequest on server side and # HttpResponse on client side). "ssl-session-info-header" : "off", # reference.conf: 1802 # Sets the size starting from which incoming http-messages will be delivered # in chunks regardless of whether chunking is actually used on the wire. # Set to infinite to disable auto chunking. "incoming-auto-chunking-threshold-size" : "infinite" }, # reference.conf: 1566 # The time period within which the TCP binding process must be completed. # Set to `infinite` to disable. "bind-timeout" : "1s", # reference.conf: 1536 # Enables/disables the returning of more detailed error messages to # the client in the error response. # Should be disabled for browser-facing APIs due to the risk of XSS attacks # and (probably) enabled for internal or non-browser APIs. # Note that spray will always produce log messages containing the full # error details. "verbose-error-messages" : "off", # reference.conf: 1516 # Enables/disables automatic handling of HEAD requests. # If this setting is enabled the server dispatches HEAD requests as GET # requests to the application and automatically strips off all message # bodies from outgoing responses. # Note that, even when this setting is off the server will never send # out message bodies on responses to HEAD requests. "transparent-head-requests" : "on", # application.conf: 183 "keystoreType" : "JKS", # reference.conf: 1541 # Enables/disables the logging of the full (potentially multiple line) # error message to the server logs. # If disabled only a single line will be logged. "verbose-error-logging" : "off", # reference.conf: 1598 # Enables/disables automatic back-pressure handling by write buffering and # receive throttling "automatic-back-pressure-handling" : "on", # reference.conf: 1443-2058 # Always contains the deployed version of spray. # Referenced, for example, from the `spray.can.server.server-header` setting. "server-header" : "spray-can/1.2.3", # application.conf: 185 # key manager factory provider "provider" : "SunX509", # reference.conf: 1613 # Enables more verbose DEBUG logging for debugging SSL related issues. "ssl-tracing" : "off", # reference.conf: 1562 # For HTTPS connections this setting specified the maximum number of # bytes that are encrypted in one go. Large responses are broken down in # chunks of this size so as to already begin sending before the response has # been encrypted entirely. "max-encryption-chunk-size" : "1m", # reference.conf: 1508 # Enables/disables the addition of a `Raw-Request-URI` header holding the # original raw request URI as the client has sent it. "raw-request-uri-header" : "off", # application.conf: 182 # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples # typical are either SSL or TLS "encryptionType" : "SSL", # reference.conf: 1485 # The period during which a service must respond to a `ChunkedRequestStart` message # with a `RegisterChunkHandler` message. During the registration period reading from # the network is suspended. It is still possible that some chunks have already been # received which will be buffered until the registration is received or the timeout is # triggered. If the timeout is triggered the connection is immediately aborted. "chunkhandler-registration-timeout" : "500 ms", # reference.conf: 1556 # The initial size if the buffer to render the response headers in. # Can be used for fine-tuning response rendering performance but probably # doesn't have to be fiddled with in most applications. "response-header-size-hint" : 512, # reference.conf: 1528 # Enables/disables an alternative response streaming mode that doesn't # use `Transfer-Encoding: chunked` but rather renders the individual # MessageChunks coming in from the application as parts of the original # response entity. # Enabling this mode causes all connections to be closed after a streaming # response has been finished since there is no other way to signal the # response end to the client. # Note that chunkless-streaming is implicitly enabled when streaming # responses to HTTP/1.0 clients (since they don't support # `Transfer-Encoding: chunked`) "chunkless-streaming" : "off", # reference.conf: 1495 # The "granularity" of timeout checking for both idle connections timeouts # as well as request timeouts, should rarely be needed to modify. # If set to `infinite` request and connection timeout checking is disabled. "reaping-cycle" : "250 ms" } [2018-03-29 11:30:01,853] INFO ka.event.slf4j.Slf4jLogger [] [] - Slf4jLogger started [2018-03-29 11:30:02,173] INFO Remoting [] [Remoting] - Starting remoting [2018-03-29 11:30:02,555] INFO Remoting [] [Remoting] - Remoting started; listening on addresses :[akka.tcp://JobServer@127.0.0.1:46151] [2018-03-29 11:30:02,557] INFO Remoting [] [Remoting] - Remoting now listens on addresses: [akka.tcp://JobServer@127.0.0.1:46151] [2018-03-29 11:30:02,598] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Starting up... [2018-03-29 11:30:02,831] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Registered cluster JMX MBean [akka:type=Cluster] [2018-03-29 11:30:02,832] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Started up successfully [2018-03-29 11:30:02,844] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - No seed-nodes configured, manual cluster join required [2018-03-29 11:30:04,472] INFO ark.jobserver.io.JobSqlDAO [] [] - rootDir is /tmp/spark-job-server/sqldao/data [2018-03-29 11:30:04,619] INFO ternal.util.VersionPrinter [] [] - Flyway 3.2.1 by Boxfuse [2018-03-29 11:30:05,459] INFO dbsupport.DbSupportFactory [] [] - Database: jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db (H2 1.3) [2018-03-29 11:30:06,781] INFO nternal.command.DbValidate [] [] - Validated 1 migration (execution time 00:01.043s) [2018-03-29 11:30:06,811] INFO atatable.MetaDataTableImpl [] [] - Creating Metadata table: "PUBLIC"."schema_version" [2018-03-29 11:30:06,848] INFO internal.command.DbMigrate [] [] - Current version of schema "PUBLIC": << Empty Schema >> [2018-03-29 11:30:06,848] INFO internal.command.DbMigrate [] [] - Migrating schema "PUBLIC" to version 0.7.0 - init tables [2018-03-29 11:30:06,906] INFO internal.command.DbMigrate [] [] - Successfully applied 1 migration to schema "PUBLIC" (execution time 00:00.096s). [2018-03-29 11:30:06,988] INFO ark.jobserver.io.JobSqlDAO [] [] - Running dummy query to initialize dbcp. [2018-03-29 11:30:07,794] INFO k.jobserver.io.JobDAOActor [] [akka://JobServer/user/dao-manager] - Starting actor spark.jobserver.io.JobDAOActor [2018-03-29 11:30:07,813] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Starting actor spark.jobserver.DataManagerActor [2018-03-29 11:30:07,825] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Starting actor spark.jobserver.JarManager [2018-03-29 11:30:07,826] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting actor spark.jobserver.AkkaClusterSupervisorActor [2018-03-29 11:30:07,835] INFO ark.jobserver.JobInfoActor [] [akka://JobServer/user/job-info] - Starting actor spark.jobserver.JobInfoActor [2018-03-29 11:30:07,846] INFO AkkaClusterSupervisorActor [] [] - AkkaClusterSupervisor initialized on akka.tcp://JobServer@127.0.0.1:46151 [2018-03-29 11:30:07,848] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/global-result-actor] - Starting actor spark.jobserver.JobResultActor [2018-03-29 11:30:08,020] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Node [akka.tcp://JobServer@127.0.0.1:46151] is JOINING, roles [supervisor] [2018-03-29 11:30:08,565] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:46151] to [Up] [2018-03-29 11:30:08,770] INFO spark.jobserver.WebApi [] [] - No authentication. [2018-03-29 11:30:09,307] INFO spark.jobserver.WebApi [] [] - Starting browser web service... [2018-03-29 11:30:11,685] INFO ay.can.server.HttpListener [] [akka://JobServer/user/IO-HTTP/listener-0] - Bound to /0.0.0.0:8090 [2018-03-29 11:30:11,694] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.io.Tcp$Bound] from Actor[akka://JobServer/user/IO-HTTP/listener-0#818205083] to Actor[akka://JobServer/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:40:58,213] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-b4-b815-a2572d2507dc [2018-03-29 11:40:58,220] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext1383524912272416053 for context knimeSparkContext [2018-03-29 11:40:58,231] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext1383524912272416053, akka.tcp://JobServer@127.0.0.1:46151] [2018-03-29 11:41:00,494] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Node [akka.tcp://JobServer@127.0.0.1:36310] is JOINING, roles [manager] [2018-03-29 11:41:00,552] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:36310] to [Up] [2018-03-29 11:41:00,793] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:36310/user/* [2018-03-29 11:41:00,801] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$a] - Starting actor spark.jobserver.JobResultActor [2018-03-29 11:41:12,273] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-29 11:41:12,406] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_65dffd0366301f75b3c2dabe274a502a4f448c88_spark-1.6.cdh5_9, 398149 bytes [2018-03-29 11:41:13,135] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1168719605] to Actor[akka://JobServer/deadLetters] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:41:21,237] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-29 11:41:21,480] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1168719605] to Actor[akka://JobServer/deadLetters] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:42:00,626] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1168719605] to Actor[akka://JobServer/deadLetters] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:42:16,317] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1168719605] to Actor[akka://JobServer/deadLetters] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:42:37,206] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1168719605] to Actor[akka://JobServer/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:43:02,264] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1168719605] to Actor[akka://JobServer/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:46:50,491] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-b4-b815-a2572d2507dc [2018-03-29 11:46:50,493] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Marking node [akka.tcp://JobServer@127.0.0.1:36310] as [Down] [2018-03-29 11:46:50,512] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A36310-0/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:46151] <- [akka.tcp://JobServer@127.0.0.1:36310]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:36310] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:36310 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-29 11:46:50,518] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A54148-1] - Message [akka.remote.transport.AssociationHandle$Disassociated] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A54148-1#1949826617] was not delivered. [8] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:46:50,519] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A54148-1] - Message [akka.remote.transport.ActorTransportAdapter$DisassociateUnderlying] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A54148-1#1949826617] was not delivered. [9] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:46:50,552] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:36310] [2018-03-29 11:46:50,553] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:36310] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-29 11:46:51,052] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#-1780282699] to Actor[akka://JobServer/deadLetters] was not delivered. [10] dead letters encountered, no more dead letters will be logged. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 11:53:54,678] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-24-964e-42650a4035c5 [2018-03-29 11:53:54,678] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext1721604207712731091 for context knimeSparkContext [2018-03-29 11:53:54,679] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext1721604207712731091, akka.tcp://JobServer@127.0.0.1:46151] [2018-03-29 11:53:56,759] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Node [akka.tcp://JobServer@127.0.0.1:43581] is JOINING, roles [manager] [2018-03-29 11:53:57,552] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:43581] to [Up] [2018-03-29 11:53:57,608] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:43581/user/* [2018-03-29 11:53:57,609] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$b] - Starting actor spark.jobserver.JobResultActor [2018-03-29 11:54:07,744] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-29 11:54:07,799] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_c1d68d31852ba93bbe21b51883e769ded6594a8e_spark-1.6.cdh5_9, 398148 bytes [2018-03-29 13:16:56,690] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-24-964e-42650a4035c5 [2018-03-29 13:16:56,696] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Marking node [akka.tcp://JobServer@127.0.0.1:43581] as [Down] [2018-03-29 13:16:56,704] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A43581-1/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:46151] <- [akka.tcp://JobServer@127.0.0.1:43581]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:43581] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:43581 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-29 13:16:57,542] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:46151] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:43581] [2018-03-29 13:16:57,542] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:43581] is now quarantined, all messages to this address will be delivered to dead letters. BUILD-VERSION: Commit 0abe49552a6fcdf6a5aca565d624cda255342c33 on branch 0.6.2-KNIME_cdh-5.13 built on 2017-12-06 18:06:16+01:00 [2018-03-29 16:35:57,195] INFO spark.jobserver.JobServer$ [] [] - Starting JobServer with config { # system properties "ui" : { # system properties "enabled" : "true", # system properties "killEnabled" : "true" }, # system properties "app" : { # system properties "name" : "spark.jobserver.JobServer" }, # application.conf: 5 # spark web UI port "webUrlPort" : 8080, # system properties "submit" : { # system properties "deployMode" : "client" }, # system properties "serializer" : "org.apache.spark.serializer.KryoSerializer", # system properties "lineage" : { # system properties "enabled" : "false", # system properties "log" : { # system properties "dir" : "/var/log/spark/lineage" } }, # system properties "executor" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "uri" : "", # system properties "extraJavaOptions" : "-Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server" }, # merge of /opt/spark-job-server/environment.conf: 25,application.conf: 7 # ######################################################################### # Job server settings # ######################################################################### "jobserver" : { # /opt/spark-job-server/environment.conf: 27 # TCP port that the job server listens on for HTTP requests "port" : 8090, # merge of /opt/spark-job-server/environment.conf: 37,application.conf: 40 "sqldao" : { # /opt/spark-job-server/environment.conf: 38 "rootdir" : "/tmp/spark-job-server/sqldao/data", # application.conf: 42 # Slick database driver, full classpath "slick-driver" : "scala.slick.driver.H2Driver", # /opt/spark-job-server/environment.conf: 41 "dbcp" : { # /opt/spark-job-server/environment.conf: 45 "maxidle" : 10, # /opt/spark-job-server/environment.conf: 43 "maxactive" : 20, # /opt/spark-job-server/environment.conf: 42 "connectonstart" : true, # /opt/spark-job-server/environment.conf: 44 "minidle" : 1, # /opt/spark-job-server/environment.conf: 46 "initialsize" : 10 }, # application.conf: 45 # JDBC driver, full classpath "jdbc-driver" : "org.h2.Driver", # merge of /opt/spark-job-server/environment.conf: 39,application.conf: 52 # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass "jdbc" : { # application.conf: 55 "password" : "", # application.conf: 54 "user" : "", # /opt/spark-job-server/environment.conf: 39 "url" : "jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db;AUTO_SERVER=TRUE;AUTO_RECONNECT=TRUE" } }, # application.conf: 87 # spark broadcst factory in yarn deployment # Versions prior to 1.1.0, spark default broadcast factory is org.apache.spark.broadcast.HttpBroadcastFactory. # Can't start multiple sparkContexts in the same JVM with HttpBroadcastFactory. "yarn-broadcast-factory" : "org.apache.spark.broadcast.TorrentBroadcastFactory", # /opt/spark-job-server/environment.conf: 52 # Storage directory for files that are uploaded to the server # via POST/data commands "datadao" : { # /opt/spark-job-server/environment.conf: 53 "rootdir" : "/tmp/spark-job-server/upload" }, # application.conf: 9 "bind-address" : "0.0.0.0", # /opt/spark-job-server/environment.conf: 65 # Timeouts for Spark context creation in seconds. In "yarn-client" mode, use # the first option, in all other modes, use the second one. If context # creation takes longer than the timeout, the jobserver closes the context. # Especially in yarn-client mode, context creation includes container # allocation, which can take a while. "yarn-context-creation-timeout" : "60 s", # /opt/spark-job-server/environment.conf: 66 "context-creation-timeout" : "60 s", # application.conf: 69 # The ask pattern timeout for Api "short-timeout" : "3 s", # /opt/spark-job-server/environment.conf: 58 # Number of jobs that can be run simultaneously per context. If not set, # defaults to number of cores on machine where Spark job server is running. "max-jobs-per-context" : 100, # /opt/spark-job-server/environment.conf: 36 # Class to use to persist data such as jars, applications, jobs, etc. # spark.jobserver.io.JobFileDAO uses the file system for persistence # spark.jobserver.io.JobSqlDAO uses an SQL database for persistence # "jobdao" : "spark.jobserver.io.JobSqlDAO", # /opt/spark-job-server/environment.conf: 30 # Directory where the job server stores uploaded jar files "jar-store-rootdir" : "/tmp/spark-job-server/jars", # application.conf: 24 "filedao" : { # application.conf: 25 "rootdir" : "/tmp/spark-jobserver/filedao/data" }, # application.conf: 15 # Number of job results to keep per JobResultActor/context "job-result-cache-size" : 5000, # /opt/spark-job-server/environment.conf: 71 # If true, a separate JVM is forked for each Spark context. # KERBEROS NOTE: In a Kerberos-enabled environment, you should set this to true # (as well as shiro authentication, see later in this file). "context-per-jvm" : true, # application.conf: 75 # Time out for job server to wait while creating named objects "named-object-creation-timeout" : "60 s" }, # merge of /opt/spark-job-server/environment.conf: 90,application.conf: 103 # Default settings for Spark contexts. These settings can be overridden on a # per-context basis. Please consult the Spark documentation for more details # on available settings. # Default settings for ad hoc as well as manually created contexts # You can add any Spark config params here, for example, spark.mesos.coarse = true "context-settings" : { # application.conf: 111 "streaming" : { # application.conf: 116 # if true, stops gracefully by waiting for the processing of all received data to be completed "stopGracefully" : true, # application.conf: 120 # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be # stopped regardless of whether the StreamingContext has been started. "stopSparkContext" : true, # application.conf: 113 # Default batch interval for Spark Streaming contexts in milliseconds "batch_interval" : 1000 }, # application.conf: 127 # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize "context-init-timeout" : "60s", # application.conf: 109 # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext "context-factory" : "spark.jobserver.context.DefaultSparkContextFactory", # application.conf: 129 "passthrough" : { # application.conf: 130 "spark" : { # application.conf: 130 "driver" : { # application.conf: 130 "allowMultipleContexts" : true } } }, # /opt/spark-job-server/environment.conf: 106 # Required setting, that sets "spark.executor.memory" in the SparkConf. Sets # the amount of memory to use per Spark executor process, in the same format # as JVM memory strings (e.g. 512m, 2g). # # Note: In "yarn-client" mode, Spark will request this amount of memory per YARN # container, plus some additional overhead. "memory-per-node" : "1G", # /opt/spark-job-server/environment.conf: 98 # Required setting, that sets "spark.cores.max" in the SparkConf. This sets # the maximum amount of CPU cores to request for the Spark on the cluster # (not from each machine). # # IMPORTANT: Note that although required by job-server, this setting only # has an effect in Standalone and Mesos clusters. "num-cpu-cores" : 2 }, # /opt/spark-job-server/environment.conf: 82 # Sets the "spark.master" property in the SparkConf. KNIME recommends # "yarn-client" for production use and "local[4]" for debugging purposes. # master = "local[4]" # Run Spark locally with 4 worker threads "master" : "yarn-client", # system properties "dynamicAllocation" : { # system properties "enabled" : "true", # system properties "minExecutors" : "0", # system properties "executorIdleTimeout" : "60", # system properties "schedulerBacklogTimeout" : "1" }, # system properties "authenticate" : { # system properties "enableSaslEncryption" : "false" }, # system properties "sql" : { # system properties "queryExecutionListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener" }, # system properties "shuffle" : { # system properties "encryption" : { # system properties "enabled" : "false" }, # system properties "service" : { # system properties "port" : "7337", # system properties "enabled" : "true" } }, # system properties "eventLog" : { # system properties "enabled" : "true", # system properties "dir" : "hdfs://ip-172-31-34-104.us-west-2.compute.internal:8020/user/spark/applicationHistory" }, # system properties "driver" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "extraJavaOptions" : "-XX:+UseConcMarkSweepGC\n -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:/opt/spark-job-server/gc.out\n -XX:MaxPermSize=512m\n -XX:+CMSClassUnloadingEnabled -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server -Dspark.executor.uri= ", # system properties "memory" : "2G" }, # application.conf: 93 # predefined Spark contexts # Below is an example, but do not uncomment it. Everything defined here is carried over to # deploy-time configs, so they will be created in all environments. :( "contexts" : {}, # system properties "extraListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener", # system properties "yarn" : { # system properties "am" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native" }, # system properties "jar" : "local:/usr/lib/spark/lib/spark-assembly.jar", # system properties "historyServer" : { # system properties "allowTracking" : "true", # system properties "address" : "http://ip-172-31-34-104.us-west-2.compute.internal:18088" } }, # system properties "jars" : "file:/opt/spark-job-server/spark-job-server.jar" } [2018-03-29 16:35:57,198] INFO spark.jobserver.JobServer$ [] [] - Spray config: { # reference.conf: 1478 # After a `Timedout` message has been sent to the timeout handler and the # request still hasn't been completed after the time period set here # the server will complete the request itself with an error response. # Set to `infinite` to disable timeout timeouts. "timeout-timeout" : "2 s", # application.conf: 187 # ssl engine provider protocols "enabledProtocols" : [ # application.conf: 187 "SSLv3", # application.conf: 187 "TLSv1" ], # application.conf: 190 "pipelining-limit" : 2, # reference.conf: 1575 # The time period within which a connection handler must have been # registered after the bind handler has received a `Connected` event. # Set to `infinite` to disable. "registration-timeout" : "1s", # /opt/spark-job-server/environment.conf: 154 # parsing { # max-content-length = 200m # } "request-timeout" : "60 s", # reference.conf: 1600 "back-pressure" : { # reference.conf: 1604 # The reciprocal rate of requested Acks per NoAcks. E.g. the default value # '10' means that every 10th write request is acknowledged. This affects the # number of writes each connection has to buffer even in absence of back-pressure. "noack-rate" : 10, # reference.conf: 1609 # The lower limit the write queue size has to shrink to before reads are resumed. # Use 'infinite' to disable the low-watermark so that reading is resumed instantly # after the next successful write. "reading-low-watermark" : "infinite" }, # reference.conf: 1570 # The time period within which the TCP unbinding process must be completed. # Set to `infinite` to disable. "unbind-timeout" : "1s", # reference.conf: 1500 # Enables/disables support for statistics collection and querying. # Even though stats keeping overhead is small, # for maximum performance switch off when not needed. "stats-support" : "on", # reference.conf: 1490 # The path of the actor to send `spray.http.Timedout` messages to. # If empty all `Timedout` messages will go to the "regular" request # handling actor. "timeout-handler" : "", # /opt/spark-job-server/environment.conf: 155 "idle-timeout" : "120 s", # /opt/spark-job-server/environment.conf: 156 "request-chunk-aggregation-limit" : "200m", # reference.conf: 1449 # Enables/disables SSL encryption. # If enabled the server uses the implicit `ServerSSLEngineProvider` member # of the `Bind` command to create `SSLEngine` instances for the underlying # IO connection. "ssl-encryption" : "off", # application.conf: 192 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge) # Needed for HTTP/1.0 requests with missing Host headers "default-host-header" : "spray.io:8765", # reference.conf: 1504 # Enables/disables the addition of a `Remote-Address` header # holding the clients (remote) IP address. "remote-address-header" : "off", # reference.conf: 1584 # The time after which a connection is aborted (RST) after a parsing error # occurred. The timeout prevents a connection which is already known to be # erroneous from receiving evermore data even if all of the data will be ignored. # However, in case of a connection abortion the client usually doesn't properly # receive the error response. This timeout is a trade-off which allows the client # some time to finish its request and receive a proper error response before the # connection is forcibly closed to free resources. "parsing-error-abort-timeout" : "2s", # merge of application.conf: 195,reference.conf: 1750 # The (default) configuration of the HTTP message parser for the server and # the client. # IMPORTANT: These settings (i.e. children of `spray.can.parsing`) can't be directly # overridden in `application.conf` to change the parser settings for client and server # altogether (see https://github.com/spray/spray/issues/346). Instead, override the # concrete settings beneath `spray.can.server.parsing` and `spray.can.client.parsing` # where these settings are copied to. "parsing" : { # reference.conf: 1758 "max-chunk-ext-length" : 256, # reference.conf: 1759 "max-chunk-size" : "1m", # application.conf: 195 # Increase this in order to upload bigger job jars "max-content-length" : "30m", # reference.conf: 1756 "max-header-count" : 64, # reference.conf: 1773 # Sets the strictness mode for parsing request target URIs. # The following values are defined: # # `strict`: RFC3986-compliant URIs are required, # a 400 response is triggered on violations # # `relaxed`: all visible 7-Bit ASCII chars are allowed # # `relaxed-with-raw-query`: like `relaxed` but additionally # the URI query is not parsed, but delivered as one raw string # as the `key` value of a single Query structure element. # "uri-parsing-mode" : "strict", # reference.conf: 1753 "max-response-reason-length" : 64, # reference.conf: 1783 # Enables/disables the logging of warning messages in case an incoming # message (request or response) contains an HTTP header which cannot be # parsed into its high-level model class due to incompatible syntax. # Note that, independently of this settings, spray will accept messages # with such headers as long as the message as a whole would still be legal # under the HTTP specification even without this header. # If a header cannot be parsed into a high-level model instance it will be # provided as a `RawHeader`. "illegal-header-warnings" : "on", # reference.conf: 1755 "max-header-value-length" : "8k", # reference.conf: 1787 # limits for the number of different values per header type that the # header cache will hold "header-cache" : { # reference.conf: 1792 "If-Modified-Since" : 0, # reference.conf: 1794 "If-Range" : 0, # reference.conf: 1788 "default" : 12, # reference.conf: 1790 "Date" : 0, # reference.conf: 1789 "Content-MD5" : 0, # reference.conf: 1796 "User-Agent" : 32, # reference.conf: 1795 "If-Unmodified-Since" : 0, # reference.conf: 1793 "If-None-Match" : 0, # reference.conf: 1791 "If-Match" : 0 }, # reference.conf: 1752 # The limits for the various parts of the HTTP message parser. "max-uri-length" : "2k", # reference.conf: 1754 "max-header-name-length" : 64, # reference.conf: 1807 # Enables/disables inclusion of an SSL-Session-Info header in parsed # messages over SSL transports (i.e., HttpRequest on server side and # HttpResponse on client side). "ssl-session-info-header" : "off", # reference.conf: 1802 # Sets the size starting from which incoming http-messages will be delivered # in chunks regardless of whether chunking is actually used on the wire. # Set to infinite to disable auto chunking. "incoming-auto-chunking-threshold-size" : "infinite" }, # reference.conf: 1566 # The time period within which the TCP binding process must be completed. # Set to `infinite` to disable. "bind-timeout" : "1s", # reference.conf: 1536 # Enables/disables the returning of more detailed error messages to # the client in the error response. # Should be disabled for browser-facing APIs due to the risk of XSS attacks # and (probably) enabled for internal or non-browser APIs. # Note that spray will always produce log messages containing the full # error details. "verbose-error-messages" : "off", # reference.conf: 1516 # Enables/disables automatic handling of HEAD requests. # If this setting is enabled the server dispatches HEAD requests as GET # requests to the application and automatically strips off all message # bodies from outgoing responses. # Note that, even when this setting is off the server will never send # out message bodies on responses to HEAD requests. "transparent-head-requests" : "on", # application.conf: 183 "keystoreType" : "JKS", # reference.conf: 1541 # Enables/disables the logging of the full (potentially multiple line) # error message to the server logs. # If disabled only a single line will be logged. "verbose-error-logging" : "off", # reference.conf: 1598 # Enables/disables automatic back-pressure handling by write buffering and # receive throttling "automatic-back-pressure-handling" : "on", # reference.conf: 1443-2058 # Always contains the deployed version of spray. # Referenced, for example, from the `spray.can.server.server-header` setting. "server-header" : "spray-can/1.2.3", # application.conf: 185 # key manager factory provider "provider" : "SunX509", # reference.conf: 1613 # Enables more verbose DEBUG logging for debugging SSL related issues. "ssl-tracing" : "off", # reference.conf: 1562 # For HTTPS connections this setting specified the maximum number of # bytes that are encrypted in one go. Large responses are broken down in # chunks of this size so as to already begin sending before the response has # been encrypted entirely. "max-encryption-chunk-size" : "1m", # reference.conf: 1508 # Enables/disables the addition of a `Raw-Request-URI` header holding the # original raw request URI as the client has sent it. "raw-request-uri-header" : "off", # application.conf: 182 # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples # typical are either SSL or TLS "encryptionType" : "SSL", # reference.conf: 1485 # The period during which a service must respond to a `ChunkedRequestStart` message # with a `RegisterChunkHandler` message. During the registration period reading from # the network is suspended. It is still possible that some chunks have already been # received which will be buffered until the registration is received or the timeout is # triggered. If the timeout is triggered the connection is immediately aborted. "chunkhandler-registration-timeout" : "500 ms", # reference.conf: 1556 # The initial size if the buffer to render the response headers in. # Can be used for fine-tuning response rendering performance but probably # doesn't have to be fiddled with in most applications. "response-header-size-hint" : 512, # reference.conf: 1528 # Enables/disables an alternative response streaming mode that doesn't # use `Transfer-Encoding: chunked` but rather renders the individual # MessageChunks coming in from the application as parts of the original # response entity. # Enabling this mode causes all connections to be closed after a streaming # response has been finished since there is no other way to signal the # response end to the client. # Note that chunkless-streaming is implicitly enabled when streaming # responses to HTTP/1.0 clients (since they don't support # `Transfer-Encoding: chunked`) "chunkless-streaming" : "off", # reference.conf: 1495 # The "granularity" of timeout checking for both idle connections timeouts # as well as request timeouts, should rarely be needed to modify. # If set to `infinite` request and connection timeout checking is disabled. "reaping-cycle" : "250 ms" } [2018-03-29 16:35:57,773] INFO ka.event.slf4j.Slf4jLogger [] [] - Slf4jLogger started [2018-03-29 16:35:57,920] INFO Remoting [] [Remoting] - Starting remoting [2018-03-29 16:35:58,342] INFO Remoting [] [Remoting] - Remoting started; listening on addresses :[akka.tcp://JobServer@127.0.0.1:34411] [2018-03-29 16:35:58,345] INFO Remoting [] [Remoting] - Remoting now listens on addresses: [akka.tcp://JobServer@127.0.0.1:34411] [2018-03-29 16:35:58,395] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Starting up... [2018-03-29 16:35:58,635] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Registered cluster JMX MBean [akka:type=Cluster] [2018-03-29 16:35:58,635] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Started up successfully [2018-03-29 16:35:58,671] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - No seed-nodes configured, manual cluster join required [2018-03-29 16:36:00,479] INFO ark.jobserver.io.JobSqlDAO [] [] - rootDir is /tmp/spark-job-server/sqldao/data [2018-03-29 16:36:00,631] INFO ternal.util.VersionPrinter [] [] - Flyway 3.2.1 by Boxfuse [2018-03-29 16:36:01,636] INFO dbsupport.DbSupportFactory [] [] - Database: jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db (H2 1.3) [2018-03-29 16:36:01,980] INFO nternal.command.DbValidate [] [] - Validated 1 migration (execution time 00:00.138s) [2018-03-29 16:36:02,094] INFO atatable.MetaDataTableImpl [] [] - Creating Metadata table: "PUBLIC"."schema_version" [2018-03-29 16:36:02,141] INFO internal.command.DbMigrate [] [] - Current version of schema "PUBLIC": << Empty Schema >> [2018-03-29 16:36:02,141] INFO internal.command.DbMigrate [] [] - Migrating schema "PUBLIC" to version 0.7.0 - init tables [2018-03-29 16:36:02,192] INFO internal.command.DbMigrate [] [] - Successfully applied 1 migration to schema "PUBLIC" (execution time 00:00.102s). [2018-03-29 16:36:02,280] INFO ark.jobserver.io.JobSqlDAO [] [] - Running dummy query to initialize dbcp. [2018-03-29 16:36:03,020] INFO k.jobserver.io.JobDAOActor [] [akka://JobServer/user/dao-manager] - Starting actor spark.jobserver.io.JobDAOActor [2018-03-29 16:36:03,045] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Starting actor spark.jobserver.DataManagerActor [2018-03-29 16:36:03,046] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Starting actor spark.jobserver.JarManager [2018-03-29 16:36:03,052] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting actor spark.jobserver.AkkaClusterSupervisorActor [2018-03-29 16:36:03,068] INFO ark.jobserver.JobInfoActor [] [akka://JobServer/user/job-info] - Starting actor spark.jobserver.JobInfoActor [2018-03-29 16:36:03,073] INFO AkkaClusterSupervisorActor [] [] - AkkaClusterSupervisor initialized on akka.tcp://JobServer@127.0.0.1:34411 [2018-03-29 16:36:03,073] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/global-result-actor] - Starting actor spark.jobserver.JobResultActor [2018-03-29 16:36:03,163] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Node [akka.tcp://JobServer@127.0.0.1:34411] is JOINING, roles [supervisor] [2018-03-29 16:36:04,038] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:34411] to [Up] [2018-03-29 16:36:04,355] INFO spark.jobserver.WebApi [] [] - No authentication. [2018-03-29 16:36:04,760] INFO spark.jobserver.WebApi [] [] - Starting browser web service... [2018-03-29 16:36:05,894] INFO ay.can.server.HttpListener [] [akka://JobServer/user/IO-HTTP/listener-0] - Bound to /0.0.0.0:8090 [2018-03-29 16:36:05,897] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.io.Tcp$Bound] from Actor[akka://JobServer/user/IO-HTTP/listener-0#-1177804449] to Actor[akka://JobServer/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 16:54:59,735] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-ce-a535-ded6fc05490e [2018-03-29 16:54:59,742] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext4715518819510323665 for context knimeSparkContext [2018-03-29 16:54:59,753] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext4715518819510323665, akka.tcp://JobServer@127.0.0.1:34411] [2018-03-29 16:55:01,759] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Node [akka.tcp://JobServer@127.0.0.1:40538] is JOINING, roles [manager] [2018-03-29 16:55:02,008] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:40538] to [Up] [2018-03-29 16:55:02,137] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:40538/user/* [2018-03-29 16:55:02,141] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$a] - Starting actor spark.jobserver.JobResultActor [2018-03-29 16:55:12,602] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-29 16:55:12,737] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_65dffd0366301f75b3c2dabe274a502a4f448c88_spark-1.6.cdh5_9, 398149 bytes [2018-03-29 16:55:13,223] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#347904157] to Actor[akka://JobServer/deadLetters] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 16:55:14,688] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#347904157] to Actor[akka://JobServer/deadLetters] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 16:55:58,033] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#347904157] to Actor[akka://JobServer/deadLetters] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 16:56:32,422] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#347904157] to Actor[akka://JobServer/deadLetters] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 16:56:47,324] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#347904157] to Actor[akka://JobServer/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 16:57:01,948] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-29 16:57:02,041] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#347904157] to Actor[akka://JobServer/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 16:57:08,738] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#347904157] to Actor[akka://JobServer/deadLetters] was not delivered. [8] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 16:57:31,859] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#347904157] to Actor[akka://JobServer/deadLetters] was not delivered. [9] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 16:57:41,678] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#347904157] to Actor[akka://JobServer/deadLetters] was not delivered. [10] dead letters encountered, no more dead letters will be logged. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-29 17:08:54,841] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-ce-a535-ded6fc05490e [2018-03-29 17:08:54,848] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Marking node [akka.tcp://JobServer@127.0.0.1:40538] as [Down] [2018-03-29 17:08:54,880] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A40538-0/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:34411] <- [akka.tcp://JobServer@127.0.0.1:40538]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:40538] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:40538 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-29 17:08:54,998] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:40538] [2018-03-29 17:08:54,999] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:40538] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-29 17:22:13,102] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-cd-85d0-a0881152bdb3 [2018-03-29 17:22:13,102] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext5679751244115998684 for context knimeSparkContext [2018-03-29 17:22:13,103] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext5679751244115998684, akka.tcp://JobServer@127.0.0.1:34411] [2018-03-29 17:22:15,214] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Node [akka.tcp://JobServer@127.0.0.1:36220] is JOINING, roles [manager] [2018-03-29 17:22:16,008] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:36220] to [Up] [2018-03-29 17:22:16,081] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:36220/user/* [2018-03-29 17:22:16,081] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$b] - Starting actor spark.jobserver.JobResultActor [2018-03-29 17:22:25,117] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-29 17:23:07,900] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-29 17:38:52,126] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-29 17:59:55,287] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-cd-85d0-a0881152bdb3 [2018-03-29 17:59:55,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:34411] - Marking node [akka.tcp://JobServer@127.0.0.1:36220] as [Down] [2018-03-29 17:59:55,310] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A36220-1/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:34411] <- [akka.tcp://JobServer@127.0.0.1:36220]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:36220] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:36220 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] BUILD-VERSION: Commit 0abe49552a6fcdf6a5aca565d624cda255342c33 on branch 0.6.2-KNIME_cdh-5.13 built on 2017-12-06 18:06:16+01:00 [2018-03-30 08:51:45,970] INFO spark.jobserver.JobServer$ [] [] - Starting JobServer with config { # system properties "ui" : { # system properties "enabled" : "true", # system properties "killEnabled" : "true" }, # system properties "app" : { # system properties "name" : "spark.jobserver.JobServer" }, # application.conf: 5 # spark web UI port "webUrlPort" : 8080, # system properties "submit" : { # system properties "deployMode" : "client" }, # system properties "serializer" : "org.apache.spark.serializer.KryoSerializer", # system properties "lineage" : { # system properties "enabled" : "false", # system properties "log" : { # system properties "dir" : "/var/log/spark/lineage" } }, # system properties "executor" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "uri" : "", # system properties "extraJavaOptions" : "-Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server" }, # merge of /opt/spark-job-server/environment.conf: 25,application.conf: 7 # ######################################################################### # Job server settings # ######################################################################### "jobserver" : { # /opt/spark-job-server/environment.conf: 27 # TCP port that the job server listens on for HTTP requests "port" : 8090, # merge of /opt/spark-job-server/environment.conf: 37,application.conf: 40 "sqldao" : { # /opt/spark-job-server/environment.conf: 38 "rootdir" : "/tmp/spark-job-server/sqldao/data", # application.conf: 42 # Slick database driver, full classpath "slick-driver" : "scala.slick.driver.H2Driver", # /opt/spark-job-server/environment.conf: 41 "dbcp" : { # /opt/spark-job-server/environment.conf: 45 "maxidle" : 10, # /opt/spark-job-server/environment.conf: 43 "maxactive" : 20, # /opt/spark-job-server/environment.conf: 42 "connectonstart" : true, # /opt/spark-job-server/environment.conf: 44 "minidle" : 1, # /opt/spark-job-server/environment.conf: 46 "initialsize" : 10 }, # application.conf: 45 # JDBC driver, full classpath "jdbc-driver" : "org.h2.Driver", # merge of /opt/spark-job-server/environment.conf: 39,application.conf: 52 # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass "jdbc" : { # application.conf: 55 "password" : "", # application.conf: 54 "user" : "", # /opt/spark-job-server/environment.conf: 39 "url" : "jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db;AUTO_SERVER=TRUE;AUTO_RECONNECT=TRUE" } }, # application.conf: 87 # spark broadcst factory in yarn deployment # Versions prior to 1.1.0, spark default broadcast factory is org.apache.spark.broadcast.HttpBroadcastFactory. # Can't start multiple sparkContexts in the same JVM with HttpBroadcastFactory. "yarn-broadcast-factory" : "org.apache.spark.broadcast.TorrentBroadcastFactory", # /opt/spark-job-server/environment.conf: 52 # Storage directory for files that are uploaded to the server # via POST/data commands "datadao" : { # /opt/spark-job-server/environment.conf: 53 "rootdir" : "/tmp/spark-job-server/upload" }, # application.conf: 9 "bind-address" : "0.0.0.0", # /opt/spark-job-server/environment.conf: 65 # Timeouts for Spark context creation in seconds. In "yarn-client" mode, use # the first option, in all other modes, use the second one. If context # creation takes longer than the timeout, the jobserver closes the context. # Especially in yarn-client mode, context creation includes container # allocation, which can take a while. "yarn-context-creation-timeout" : "60 s", # /opt/spark-job-server/environment.conf: 66 "context-creation-timeout" : "60 s", # application.conf: 69 # The ask pattern timeout for Api "short-timeout" : "3 s", # /opt/spark-job-server/environment.conf: 58 # Number of jobs that can be run simultaneously per context. If not set, # defaults to number of cores on machine where Spark job server is running. "max-jobs-per-context" : 100, # /opt/spark-job-server/environment.conf: 36 # Class to use to persist data such as jars, applications, jobs, etc. # spark.jobserver.io.JobFileDAO uses the file system for persistence # spark.jobserver.io.JobSqlDAO uses an SQL database for persistence # "jobdao" : "spark.jobserver.io.JobSqlDAO", # /opt/spark-job-server/environment.conf: 30 # Directory where the job server stores uploaded jar files "jar-store-rootdir" : "/tmp/spark-job-server/jars", # application.conf: 24 "filedao" : { # application.conf: 25 "rootdir" : "/tmp/spark-jobserver/filedao/data" }, # application.conf: 15 # Number of job results to keep per JobResultActor/context "job-result-cache-size" : 5000, # /opt/spark-job-server/environment.conf: 71 # If true, a separate JVM is forked for each Spark context. # KERBEROS NOTE: In a Kerberos-enabled environment, you should set this to true # (as well as shiro authentication, see later in this file). "context-per-jvm" : true, # application.conf: 75 # Time out for job server to wait while creating named objects "named-object-creation-timeout" : "60 s" }, # merge of /opt/spark-job-server/environment.conf: 90,application.conf: 103 # Default settings for Spark contexts. These settings can be overridden on a # per-context basis. Please consult the Spark documentation for more details # on available settings. # Default settings for ad hoc as well as manually created contexts # You can add any Spark config params here, for example, spark.mesos.coarse = true "context-settings" : { # application.conf: 111 "streaming" : { # application.conf: 116 # if true, stops gracefully by waiting for the processing of all received data to be completed "stopGracefully" : true, # application.conf: 120 # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be # stopped regardless of whether the StreamingContext has been started. "stopSparkContext" : true, # application.conf: 113 # Default batch interval for Spark Streaming contexts in milliseconds "batch_interval" : 1000 }, # application.conf: 127 # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize "context-init-timeout" : "60s", # application.conf: 109 # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext "context-factory" : "spark.jobserver.context.DefaultSparkContextFactory", # application.conf: 129 "passthrough" : { # application.conf: 130 "spark" : { # application.conf: 130 "driver" : { # application.conf: 130 "allowMultipleContexts" : true } } }, # /opt/spark-job-server/environment.conf: 106 # Required setting, that sets "spark.executor.memory" in the SparkConf. Sets # the amount of memory to use per Spark executor process, in the same format # as JVM memory strings (e.g. 512m, 2g). # # Note: In "yarn-client" mode, Spark will request this amount of memory per YARN # container, plus some additional overhead. "memory-per-node" : "1G", # /opt/spark-job-server/environment.conf: 98 # Required setting, that sets "spark.cores.max" in the SparkConf. This sets # the maximum amount of CPU cores to request for the Spark on the cluster # (not from each machine). # # IMPORTANT: Note that although required by job-server, this setting only # has an effect in Standalone and Mesos clusters. "num-cpu-cores" : 2 }, # /opt/spark-job-server/environment.conf: 82 # Sets the "spark.master" property in the SparkConf. KNIME recommends # "yarn-client" for production use and "local[4]" for debugging purposes. # master = "local[4]" # Run Spark locally with 4 worker threads "master" : "yarn-client", # system properties "dynamicAllocation" : { # system properties "enabled" : "true", # system properties "minExecutors" : "0", # system properties "executorIdleTimeout" : "60", # system properties "schedulerBacklogTimeout" : "1" }, # system properties "authenticate" : { # system properties "enableSaslEncryption" : "false" }, # system properties "sql" : { # system properties "queryExecutionListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener" }, # system properties "shuffle" : { # system properties "encryption" : { # system properties "enabled" : "false" }, # system properties "service" : { # system properties "port" : "7337", # system properties "enabled" : "true" } }, # system properties "eventLog" : { # system properties "enabled" : "true", # system properties "dir" : "hdfs://ip-172-31-34-104.us-west-2.compute.internal:8020/user/spark/applicationHistory" }, # system properties "driver" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "extraJavaOptions" : "-XX:+UseConcMarkSweepGC\n -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:/opt/spark-job-server/gc.out\n -XX:MaxPermSize=512m\n -XX:+CMSClassUnloadingEnabled -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server -Dspark.executor.uri= ", # system properties "memory" : "2G" }, # application.conf: 93 # predefined Spark contexts # Below is an example, but do not uncomment it. Everything defined here is carried over to # deploy-time configs, so they will be created in all environments. :( "contexts" : {}, # system properties "extraListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener", # system properties "yarn" : { # system properties "am" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native" }, # system properties "jar" : "local:/usr/lib/spark/lib/spark-assembly.jar", # system properties "historyServer" : { # system properties "allowTracking" : "true", # system properties "address" : "http://ip-172-31-34-104.us-west-2.compute.internal:18088" } }, # system properties "jars" : "file:/opt/spark-job-server/spark-job-server.jar" } [2018-03-30 08:51:45,973] INFO spark.jobserver.JobServer$ [] [] - Spray config: { # reference.conf: 1478 # After a `Timedout` message has been sent to the timeout handler and the # request still hasn't been completed after the time period set here # the server will complete the request itself with an error response. # Set to `infinite` to disable timeout timeouts. "timeout-timeout" : "2 s", # application.conf: 187 # ssl engine provider protocols "enabledProtocols" : [ # application.conf: 187 "SSLv3", # application.conf: 187 "TLSv1" ], # application.conf: 190 "pipelining-limit" : 2, # reference.conf: 1575 # The time period within which a connection handler must have been # registered after the bind handler has received a `Connected` event. # Set to `infinite` to disable. "registration-timeout" : "1s", # /opt/spark-job-server/environment.conf: 154 # parsing { # max-content-length = 200m # } "request-timeout" : "60 s", # reference.conf: 1600 "back-pressure" : { # reference.conf: 1604 # The reciprocal rate of requested Acks per NoAcks. E.g. the default value # '10' means that every 10th write request is acknowledged. This affects the # number of writes each connection has to buffer even in absence of back-pressure. "noack-rate" : 10, # reference.conf: 1609 # The lower limit the write queue size has to shrink to before reads are resumed. # Use 'infinite' to disable the low-watermark so that reading is resumed instantly # after the next successful write. "reading-low-watermark" : "infinite" }, # reference.conf: 1570 # The time period within which the TCP unbinding process must be completed. # Set to `infinite` to disable. "unbind-timeout" : "1s", # reference.conf: 1500 # Enables/disables support for statistics collection and querying. # Even though stats keeping overhead is small, # for maximum performance switch off when not needed. "stats-support" : "on", # reference.conf: 1490 # The path of the actor to send `spray.http.Timedout` messages to. # If empty all `Timedout` messages will go to the "regular" request # handling actor. "timeout-handler" : "", # /opt/spark-job-server/environment.conf: 155 "idle-timeout" : "120 s", # /opt/spark-job-server/environment.conf: 156 "request-chunk-aggregation-limit" : "200m", # reference.conf: 1449 # Enables/disables SSL encryption. # If enabled the server uses the implicit `ServerSSLEngineProvider` member # of the `Bind` command to create `SSLEngine` instances for the underlying # IO connection. "ssl-encryption" : "off", # application.conf: 192 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge) # Needed for HTTP/1.0 requests with missing Host headers "default-host-header" : "spray.io:8765", # reference.conf: 1504 # Enables/disables the addition of a `Remote-Address` header # holding the clients (remote) IP address. "remote-address-header" : "off", # reference.conf: 1584 # The time after which a connection is aborted (RST) after a parsing error # occurred. The timeout prevents a connection which is already known to be # erroneous from receiving evermore data even if all of the data will be ignored. # However, in case of a connection abortion the client usually doesn't properly # receive the error response. This timeout is a trade-off which allows the client # some time to finish its request and receive a proper error response before the # connection is forcibly closed to free resources. "parsing-error-abort-timeout" : "2s", # merge of application.conf: 195,reference.conf: 1750 # The (default) configuration of the HTTP message parser for the server and # the client. # IMPORTANT: These settings (i.e. children of `spray.can.parsing`) can't be directly # overridden in `application.conf` to change the parser settings for client and server # altogether (see https://github.com/spray/spray/issues/346). Instead, override the # concrete settings beneath `spray.can.server.parsing` and `spray.can.client.parsing` # where these settings are copied to. "parsing" : { # reference.conf: 1758 "max-chunk-ext-length" : 256, # reference.conf: 1759 "max-chunk-size" : "1m", # application.conf: 195 # Increase this in order to upload bigger job jars "max-content-length" : "30m", # reference.conf: 1756 "max-header-count" : 64, # reference.conf: 1773 # Sets the strictness mode for parsing request target URIs. # The following values are defined: # # `strict`: RFC3986-compliant URIs are required, # a 400 response is triggered on violations # # `relaxed`: all visible 7-Bit ASCII chars are allowed # # `relaxed-with-raw-query`: like `relaxed` but additionally # the URI query is not parsed, but delivered as one raw string # as the `key` value of a single Query structure element. # "uri-parsing-mode" : "strict", # reference.conf: 1753 "max-response-reason-length" : 64, # reference.conf: 1783 # Enables/disables the logging of warning messages in case an incoming # message (request or response) contains an HTTP header which cannot be # parsed into its high-level model class due to incompatible syntax. # Note that, independently of this settings, spray will accept messages # with such headers as long as the message as a whole would still be legal # under the HTTP specification even without this header. # If a header cannot be parsed into a high-level model instance it will be # provided as a `RawHeader`. "illegal-header-warnings" : "on", # reference.conf: 1755 "max-header-value-length" : "8k", # reference.conf: 1787 # limits for the number of different values per header type that the # header cache will hold "header-cache" : { # reference.conf: 1792 "If-Modified-Since" : 0, # reference.conf: 1794 "If-Range" : 0, # reference.conf: 1788 "default" : 12, # reference.conf: 1790 "Date" : 0, # reference.conf: 1789 "Content-MD5" : 0, # reference.conf: 1796 "User-Agent" : 32, # reference.conf: 1795 "If-Unmodified-Since" : 0, # reference.conf: 1793 "If-None-Match" : 0, # reference.conf: 1791 "If-Match" : 0 }, # reference.conf: 1752 # The limits for the various parts of the HTTP message parser. "max-uri-length" : "2k", # reference.conf: 1754 "max-header-name-length" : 64, # reference.conf: 1807 # Enables/disables inclusion of an SSL-Session-Info header in parsed # messages over SSL transports (i.e., HttpRequest on server side and # HttpResponse on client side). "ssl-session-info-header" : "off", # reference.conf: 1802 # Sets the size starting from which incoming http-messages will be delivered # in chunks regardless of whether chunking is actually used on the wire. # Set to infinite to disable auto chunking. "incoming-auto-chunking-threshold-size" : "infinite" }, # reference.conf: 1566 # The time period within which the TCP binding process must be completed. # Set to `infinite` to disable. "bind-timeout" : "1s", # reference.conf: 1536 # Enables/disables the returning of more detailed error messages to # the client in the error response. # Should be disabled for browser-facing APIs due to the risk of XSS attacks # and (probably) enabled for internal or non-browser APIs. # Note that spray will always produce log messages containing the full # error details. "verbose-error-messages" : "off", # reference.conf: 1516 # Enables/disables automatic handling of HEAD requests. # If this setting is enabled the server dispatches HEAD requests as GET # requests to the application and automatically strips off all message # bodies from outgoing responses. # Note that, even when this setting is off the server will never send # out message bodies on responses to HEAD requests. "transparent-head-requests" : "on", # application.conf: 183 "keystoreType" : "JKS", # reference.conf: 1541 # Enables/disables the logging of the full (potentially multiple line) # error message to the server logs. # If disabled only a single line will be logged. "verbose-error-logging" : "off", # reference.conf: 1598 # Enables/disables automatic back-pressure handling by write buffering and # receive throttling "automatic-back-pressure-handling" : "on", # reference.conf: 1443-2058 # Always contains the deployed version of spray. # Referenced, for example, from the `spray.can.server.server-header` setting. "server-header" : "spray-can/1.2.3", # application.conf: 185 # key manager factory provider "provider" : "SunX509", # reference.conf: 1613 # Enables more verbose DEBUG logging for debugging SSL related issues. "ssl-tracing" : "off", # reference.conf: 1562 # For HTTPS connections this setting specified the maximum number of # bytes that are encrypted in one go. Large responses are broken down in # chunks of this size so as to already begin sending before the response has # been encrypted entirely. "max-encryption-chunk-size" : "1m", # reference.conf: 1508 # Enables/disables the addition of a `Raw-Request-URI` header holding the # original raw request URI as the client has sent it. "raw-request-uri-header" : "off", # application.conf: 182 # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples # typical are either SSL or TLS "encryptionType" : "SSL", # reference.conf: 1485 # The period during which a service must respond to a `ChunkedRequestStart` message # with a `RegisterChunkHandler` message. During the registration period reading from # the network is suspended. It is still possible that some chunks have already been # received which will be buffered until the registration is received or the timeout is # triggered. If the timeout is triggered the connection is immediately aborted. "chunkhandler-registration-timeout" : "500 ms", # reference.conf: 1556 # The initial size if the buffer to render the response headers in. # Can be used for fine-tuning response rendering performance but probably # doesn't have to be fiddled with in most applications. "response-header-size-hint" : 512, # reference.conf: 1528 # Enables/disables an alternative response streaming mode that doesn't # use `Transfer-Encoding: chunked` but rather renders the individual # MessageChunks coming in from the application as parts of the original # response entity. # Enabling this mode causes all connections to be closed after a streaming # response has been finished since there is no other way to signal the # response end to the client. # Note that chunkless-streaming is implicitly enabled when streaming # responses to HTTP/1.0 clients (since they don't support # `Transfer-Encoding: chunked`) "chunkless-streaming" : "off", # reference.conf: 1495 # The "granularity" of timeout checking for both idle connections timeouts # as well as request timeouts, should rarely be needed to modify. # If set to `infinite` request and connection timeout checking is disabled. "reaping-cycle" : "250 ms" } [2018-03-30 08:51:46,547] INFO ka.event.slf4j.Slf4jLogger [] [] - Slf4jLogger started [2018-03-30 08:51:46,816] INFO Remoting [] [Remoting] - Starting remoting [2018-03-30 08:51:47,197] INFO Remoting [] [Remoting] - Remoting started; listening on addresses :[akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 08:51:47,197] INFO Remoting [] [Remoting] - Remoting now listens on addresses: [akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 08:51:47,242] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Starting up... [2018-03-30 08:51:47,574] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Registered cluster JMX MBean [akka:type=Cluster] [2018-03-30 08:51:47,574] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Started up successfully [2018-03-30 08:51:47,604] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - No seed-nodes configured, manual cluster join required [2018-03-30 08:51:49,215] INFO ark.jobserver.io.JobSqlDAO [] [] - rootDir is /tmp/spark-job-server/sqldao/data [2018-03-30 08:51:49,346] INFO ternal.util.VersionPrinter [] [] - Flyway 3.2.1 by Boxfuse [2018-03-30 08:51:50,224] INFO dbsupport.DbSupportFactory [] [] - Database: jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db (H2 1.3) [2018-03-30 08:51:50,774] INFO nternal.command.DbValidate [] [] - Validated 1 migration (execution time 00:00.300s) [2018-03-30 08:51:50,788] INFO atatable.MetaDataTableImpl [] [] - Creating Metadata table: "PUBLIC"."schema_version" [2018-03-30 08:51:50,806] INFO internal.command.DbMigrate [] [] - Current version of schema "PUBLIC": << Empty Schema >> [2018-03-30 08:51:50,806] INFO internal.command.DbMigrate [] [] - Migrating schema "PUBLIC" to version 0.7.0 - init tables [2018-03-30 08:51:50,827] INFO internal.command.DbMigrate [] [] - Successfully applied 1 migration to schema "PUBLIC" (execution time 00:00.041s). [2018-03-30 08:51:50,872] INFO ark.jobserver.io.JobSqlDAO [] [] - Running dummy query to initialize dbcp. [2018-03-30 08:51:52,369] INFO k.jobserver.io.JobDAOActor [] [akka://JobServer/user/dao-manager] - Starting actor spark.jobserver.io.JobDAOActor [2018-03-30 08:51:52,387] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Starting actor spark.jobserver.DataManagerActor [2018-03-30 08:51:52,388] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Starting actor spark.jobserver.JarManager [2018-03-30 08:51:52,405] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting actor spark.jobserver.AkkaClusterSupervisorActor [2018-03-30 08:51:52,418] INFO ark.jobserver.JobInfoActor [] [akka://JobServer/user/job-info] - Starting actor spark.jobserver.JobInfoActor [2018-03-30 08:51:52,481] INFO AkkaClusterSupervisorActor [] [] - AkkaClusterSupervisor initialized on akka.tcp://JobServer@127.0.0.1:33301 [2018-03-30 08:51:52,482] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/global-result-actor] - Starting actor spark.jobserver.JobResultActor [2018-03-30 08:51:52,513] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:33301] is JOINING, roles [supervisor] [2018-03-30 08:51:53,132] INFO spark.jobserver.WebApi [] [] - No authentication. [2018-03-30 08:51:53,336] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:33301] to [Up] [2018-03-30 08:51:53,893] INFO spark.jobserver.WebApi [] [] - Starting browser web service... [2018-03-30 08:51:57,292] INFO ay.can.server.HttpListener [] [akka://JobServer/user/IO-HTTP/listener-0] - Bound to /0.0.0.0:8090 [2018-03-30 08:51:57,294] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.io.Tcp$Bound] from Actor[akka://JobServer/user/IO-HTTP/listener-0#2137194332] to Actor[akka://JobServer/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:02:59,564] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-87-a297-ec76544c80aa [2018-03-30 09:02:59,571] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext8247401068800592347 for context knimeSparkContext [2018-03-30 09:02:59,588] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext8247401068800592347, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 09:03:02,017] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:40561] is JOINING, roles [manager] [2018-03-30 09:03:02,288] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:40561] to [Up] [2018-03-30 09:03:02,375] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:40561/user/* [2018-03-30 09:03:02,377] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$a] - Starting actor spark.jobserver.JobResultActor [2018-03-30 09:03:13,350] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 09:03:13,472] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_c1d68d31852ba93bbe21b51883e769ded6594a8e_spark-1.6.cdh5_9, 398148 bytes [2018-03-30 09:03:14,105] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1236280388] to Actor[akka://JobServer/deadLetters] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:05:03,996] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 09:05:04,206] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1236280388] to Actor[akka://JobServer/deadLetters] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:05:31,880] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1236280388] to Actor[akka://JobServer/deadLetters] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:05:41,438] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1236280388] to Actor[akka://JobServer/deadLetters] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:05:51,195] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1236280388] to Actor[akka://JobServer/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:05:52,641] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1236280388] to Actor[akka://JobServer/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:05:54,010] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1236280388] to Actor[akka://JobServer/deadLetters] was not delivered. [8] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:05:55,397] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1236280388] to Actor[akka://JobServer/deadLetters] was not delivered. [9] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:05:55,703] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-1236280388] to Actor[akka://JobServer/deadLetters] was not delivered. [10] dead letters encountered, no more dead letters will be logged. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 09:33:08,561] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-30 09:33:08,562] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:40561] as [Down] [2018-03-30 09:33:08,753] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-87-a297-ec76544c80aa [2018-03-30 09:33:08,800] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A40561-0/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:40561]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:40561] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:40561 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 09:33:09,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:40561] [2018-03-30 09:33:09,288] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:40561] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 09:33:11,830] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-08-bc22-25723e5a14a3 [2018-03-30 09:33:11,831] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext2167216448998548019 for context knimeSparkContext [2018-03-30 09:33:11,832] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext2167216448998548019, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 09:33:14,110] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:33273] is JOINING, roles [manager] [2018-03-30 09:33:14,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:33273] to [Up] [2018-03-30 09:33:14,335] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:33273/user/* [2018-03-30 09:33:14,335] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$b] - Starting actor spark.jobserver.JobResultActor [2018-03-30 09:33:17,714] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:33273/user/jobManager-08-bc22-25723e5a14a3#-1951245385] java.lang.IllegalArgumentException: Required executor memory (4096+409 MB) is above the max threshold (2048 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 09:33:17,716] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:33273] as [Down] [2018-03-30 09:33:17,798] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Ignoring received gossip status from unreachable [UniqueAddress(akka.tcp://JobServer@127.0.0.1:33273,1323099352)] [2018-03-30 09:33:17,878] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A33273-1/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:33273]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:33273] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:33273 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 09:33:18,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:33273] [2018-03-30 09:33:18,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:33273] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 09:33:38,954] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-9e-91ad-229bd79e9951 [2018-03-30 09:33:38,954] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext927681163955068355 for context knimeSparkContext [2018-03-30 09:33:38,954] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext927681163955068355, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 09:33:41,157] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:36982] is JOINING, roles [manager] [2018-03-30 09:33:41,288] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:36982] to [Up] [2018-03-30 09:33:41,369] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:36982/user/* [2018-03-30 09:33:41,370] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$c] - Starting actor spark.jobserver.JobResultActor [2018-03-30 09:33:44,793] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:36982/user/jobManager-9e-91ad-229bd79e9951#-252943171] java.lang.IllegalArgumentException: Required executor memory (2048+384 MB) is above the max threshold (2048 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 09:33:44,795] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:36982] as [Down] [2018-03-30 09:33:44,880] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Ignoring received gossip status from unreachable [UniqueAddress(akka.tcp://JobServer@127.0.0.1:36982,-239340040)] [2018-03-30 09:33:44,930] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A36982-2/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:36982]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:36982] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:36982 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 09:33:45,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:36982] [2018-03-30 09:33:45,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:36982] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 09:34:08,199] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-0f-a04f-6c530fff0dab [2018-03-30 09:34:08,199] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext4521952086951316902 for context knimeSparkContext [2018-03-30 09:34:08,200] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext4521952086951316902, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 09:34:10,489] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:40727] is JOINING, roles [manager] [2018-03-30 09:34:11,288] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:40727] to [Up] [2018-03-30 09:34:11,367] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:40727/user/* [2018-03-30 09:34:11,368] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$d] - Starting actor spark.jobserver.JobResultActor [2018-03-30 09:34:43,511] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 09:35:22,142] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 09:55:23,252] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-30 09:55:23,262] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:40727] as [Down] [2018-03-30 09:55:23,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:40727] [2018-03-30 09:55:23,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:40727] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 09:55:23,289] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-0f-a04f-6c530fff0dab [2018-03-30 09:55:27,029] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-4d-b765-d0a8bc54b423 [2018-03-30 09:55:27,030] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext1662908792660397644 for context knimeSparkContext [2018-03-30 09:55:27,030] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext1662908792660397644, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 09:55:29,163] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:46414] is JOINING, roles [manager] [2018-03-30 09:55:29,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:46414] to [Up] [2018-03-30 09:55:29,324] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:46414/user/* [2018-03-30 09:55:29,324] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$e] - Starting actor spark.jobserver.JobResultActor [2018-03-30 09:56:01,165] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 09:56:02,571] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 10:32:46,181] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-4d-b765-d0a8bc54b423 [2018-03-30 10:32:46,181] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:46414] as [Down] [2018-03-30 10:32:46,200] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A46414-4/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:46414]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:46414] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:46414 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 10:32:46,276] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:46414] [2018-03-30 10:32:46,276] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:46414] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 10:36:06,127] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-e7-8c10-b056172cd0d1 [2018-03-30 10:36:06,128] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext9164809762133744550 for context knimeSparkContext [2018-03-30 10:36:06,128] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext9164809762133744550, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 10:36:08,349] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:35456] is JOINING, roles [manager] [2018-03-30 10:36:09,276] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:35456] to [Up] [2018-03-30 10:36:09,345] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:35456/user/* [2018-03-30 10:36:09,346] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$f] - Starting actor spark.jobserver.JobResultActor [2018-03-30 10:36:12,906] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:35456/user/jobManager-e7-8c10-b056172cd0d1#2010597085] java.lang.IllegalArgumentException: Required executor memory (4096+409 MB) is above the max threshold (4096 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 10:36:12,907] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:35456] as [Down] [2018-03-30 10:36:13,069] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A35456-5/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:35456]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:35456] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:35456 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 10:36:13,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:35456] [2018-03-30 10:36:13,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:35456] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 10:36:30,132] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-41-baa1-b12c24fec192 [2018-03-30 10:36:30,132] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext1037638572896644536 for context knimeSparkContext [2018-03-30 10:36:30,133] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext1037638572896644536, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 10:36:32,248] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:34776] is JOINING, roles [manager] [2018-03-30 10:36:32,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:34776] to [Up] [2018-03-30 10:36:32,427] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:34776/user/* [2018-03-30 10:36:32,427] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$g] - Starting actor spark.jobserver.JobResultActor [2018-03-30 10:37:04,751] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 10:37:06,106] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 10:40:07,870] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-30 10:40:07,870] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:34776] as [Down] [2018-03-30 10:40:07,980] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Ignoring received gossip status from unreachable [UniqueAddress(akka.tcp://JobServer@127.0.0.1:34776,-1438638123)] [2018-03-30 10:40:08,101] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-41-baa1-b12c24fec192 [2018-03-30 10:40:08,111] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A34776-6/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:34776]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:34776] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:34776 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 10:40:08,276] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:34776] [2018-03-30 10:40:08,277] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:34776] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 10:40:10,900] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-f0-bb86-fd53fab83ac5 [2018-03-30 10:40:10,901] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext5705795253431020135 for context knimeSparkContext [2018-03-30 10:40:10,901] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext5705795253431020135, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 10:40:13,020] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:38946] is JOINING, roles [manager] [2018-03-30 10:40:13,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:38946] to [Up] [2018-03-30 10:40:13,339] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:38946/user/* [2018-03-30 10:40:13,339] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$h] - Starting actor spark.jobserver.JobResultActor [2018-03-30 10:40:16,883] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:38946/user/jobManager-f0-bb86-fd53fab83ac5#1026636867] java.lang.IllegalArgumentException: Required executor memory (4096+409 MB) is above the max threshold (4096 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 10:40:16,884] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:38946] as [Down] [2018-03-30 10:40:17,031] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A38946-7/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:38946]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:38946] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:38946 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 10:40:17,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:38946] [2018-03-30 10:40:17,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:38946] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 10:40:33,597] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-41-9af7-fdf98714171f [2018-03-30 10:40:33,598] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext6905096089632358357 for context knimeSparkContext [2018-03-30 10:40:33,598] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext6905096089632358357, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 10:40:35,729] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:40614] is JOINING, roles [manager] [2018-03-30 10:40:36,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:40614] to [Up] [2018-03-30 10:40:36,354] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:40614/user/* [2018-03-30 10:40:36,356] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$i] - Starting actor spark.jobserver.JobResultActor [2018-03-30 10:41:08,379] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 10:41:09,724] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 11:32:29,210] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-41-9af7-fdf98714171f [2018-03-30 11:32:29,211] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:40614] as [Down] [2018-03-30 11:32:29,242] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A40614-8/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:40614]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:40614] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:40614 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 11:32:29,276] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:40614] [2018-03-30 11:32:29,277] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:40614] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 11:50:44,724] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-d5-95de-06aae2e6f576 [2018-03-30 11:50:44,724] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext6094358930592085239 for context knimeSparkContext [2018-03-30 11:50:44,724] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext6094358930592085239, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 11:50:47,041] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:36066] is JOINING, roles [manager] [2018-03-30 11:50:47,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:36066] to [Up] [2018-03-30 11:50:47,333] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:36066/user/* [2018-03-30 11:50:47,333] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$j] - Starting actor spark.jobserver.JobResultActor [2018-03-30 11:50:57,333] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 11:50:57,367] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_65dffd0366301f75b3c2dabe274a502a4f448c88_spark-1.6.cdh5_9, 398149 bytes [2018-03-30 11:51:30,119] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 12:04:55,415] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-30 12:04:55,416] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:36066] as [Down] [2018-03-30 12:04:55,591] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-d5-95de-06aae2e6f576 [2018-03-30 12:04:55,600] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A36066-9/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:36066]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:36066] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:36066 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:04:56,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:36066] [2018-03-30 12:04:56,286] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:36066] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:04:57,970] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-cb-ac79-85baa502137f [2018-03-30 12:04:57,970] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext8743420471359157082 for context knimeSparkContext [2018-03-30 12:04:57,971] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext8743420471359157082, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:05:00,279] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:44473] is JOINING, roles [manager] [2018-03-30 12:05:00,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:44473] to [Up] [2018-03-30 12:05:00,475] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:44473/user/* [2018-03-30 12:05:00,475] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$k] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:05:03,868] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:44473/user/jobManager-cb-ac79-85baa502137f#-78970754] java.lang.IllegalArgumentException: Required executor memory (6144+614 MB) is above the max threshold (5120 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 12:05:03,873] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:44473] as [Down] [2018-03-30 12:05:04,023] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A44473-10/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:44473]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:44473] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:44473 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:05:04,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:44473] [2018-03-30 12:05:04,286] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:44473] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:05:23,499] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-c6-a556-8e7255867d43 [2018-03-30 12:05:23,500] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext4794679970801346978 for context knimeSparkContext [2018-03-30 12:05:23,500] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext4794679970801346978, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:05:25,787] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:33535] is JOINING, roles [manager] [2018-03-30 12:05:26,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:33535] to [Up] [2018-03-30 12:05:26,320] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:33535/user/* [2018-03-30 12:05:26,321] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$l] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:05:29,933] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:33535/user/jobManager-c6-a556-8e7255867d43#80617242] java.lang.IllegalArgumentException: Required executor memory (5120+512 MB) is above the max threshold (5120 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 12:05:29,946] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:33535] as [Down] [2018-03-30 12:05:30,043] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A33535-11/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:33535]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:33535] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:33535 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:05:30,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:33535] [2018-03-30 12:05:30,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:33535] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:05:34,459] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-62-b20f-5ed1f666c320 [2018-03-30 12:05:34,460] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext8305090002196807995 for context knimeSparkContext [2018-03-30 12:05:34,460] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext8305090002196807995, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:05:36,643] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:36053] is JOINING, roles [manager] [2018-03-30 12:05:37,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:36053] to [Up] [2018-03-30 12:05:37,327] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:36053/user/* [2018-03-30 12:05:37,327] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$m] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:05:40,794] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:36053/user/jobManager-62-b20f-5ed1f666c320#1009554473] java.lang.IllegalArgumentException: Required executor memory (5120+512 MB) is above the max threshold (5120 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 12:05:40,800] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:36053] as [Down] [2018-03-30 12:05:40,987] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A36053-12/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:36053]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:36053] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:36053 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:05:41,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:36053] [2018-03-30 12:05:41,291] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:36053] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:06:01,635] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-80-82f6-d0fec543bd79 [2018-03-30 12:06:01,635] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext4741538749094108870 for context knimeSparkContext [2018-03-30 12:06:01,636] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext4741538749094108870, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:06:03,797] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:37974] is JOINING, roles [manager] [2018-03-30 12:06:04,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:37974] to [Up] [2018-03-30 12:06:04,324] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:37974/user/* [2018-03-30 12:06:04,325] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$n] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:06:07,889] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:37974/user/jobManager-80-82f6-d0fec543bd79#1232247346] java.lang.IllegalArgumentException: Required executor memory (5120+512 MB) is above the max threshold (5120 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 12:06:07,891] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:37974] as [Down] [2018-03-30 12:06:08,044] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A37974-13/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:37974]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:37974] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:37974 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:06:08,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:37974] [2018-03-30 12:06:08,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:37974] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:06:20,880] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-34-a371-59ad16eba111 [2018-03-30 12:06:20,880] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext6905336143760943574 for context knimeSparkContext [2018-03-30 12:06:20,881] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext6905336143760943574, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:06:23,113] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:36878] is JOINING, roles [manager] [2018-03-30 12:06:23,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:36878] to [Up] [2018-03-30 12:06:23,322] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:36878/user/* [2018-03-30 12:06:23,323] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$o] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:06:37,331] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:36878/user/jobManager-34-a371-59ad16eba111#-1420249186] org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master. at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:124) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:64) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 12:06:37,332] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:36878] as [Down] [2018-03-30 12:06:37,388] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A36878-14/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:36878]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:36878] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:36878 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:06:38,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:36878] [2018-03-30 12:06:38,286] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:36878] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:07:00,751] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-cb-a765-0f4cf94b8371 [2018-03-30 12:07:00,752] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext976411940600681471 for context knimeSparkContext [2018-03-30 12:07:00,752] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext976411940600681471, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:07:02,848] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:35889] is JOINING, roles [manager] [2018-03-30 12:07:03,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:35889] to [Up] [2018-03-30 12:07:03,371] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:35889/user/* [2018-03-30 12:07:03,371] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$p] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:07:17,538] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:35889/user/jobManager-cb-a765-0f4cf94b8371#-1419448504] org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master. at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:124) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:64) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 12:07:17,543] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:35889] as [Down] [2018-03-30 12:07:17,567] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Ignoring received gossip status from unreachable [UniqueAddress(akka.tcp://JobServer@127.0.0.1:35889,-1577140618)] [2018-03-30 12:07:17,619] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A35889-15/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:35889]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:35889] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:35889 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:07:18,276] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:35889] [2018-03-30 12:07:18,276] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:35889] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:22:02,491] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-6d-b7f3-85a4e6831401 [2018-03-30 12:22:02,492] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext7224077213033335796 for context knimeSparkContext [2018-03-30 12:22:02,492] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext7224077213033335796, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:22:04,655] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:37141] is JOINING, roles [manager] [2018-03-30 12:22:05,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:37141] to [Up] [2018-03-30 12:22:05,340] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:37141/user/* [2018-03-30 12:22:05,341] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$q] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:22:17,787] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 12:22:41,951] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-30 12:22:41,952] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:37141] as [Down] [2018-03-30 12:22:42,151] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-6d-b7f3-85a4e6831401 [2018-03-30 12:22:42,250] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A37141-16/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:37141]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:37141] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:37141 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:22:42,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:37141] [2018-03-30 12:22:42,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:37141] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:22:44,728] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-f6-a392-7c6fe3c8c599 [2018-03-30 12:22:44,728] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext5492974594045046964 for context knimeSparkContext [2018-03-30 12:22:44,729] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext5492974594045046964, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:22:46,855] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:39166] is JOINING, roles [manager] [2018-03-30 12:22:47,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:39166] to [Up] [2018-03-30 12:22:47,347] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:39166/user/* [2018-03-30 12:22:47,348] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$r] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:23:19,500] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 12:23:30,887] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 12:25:37,422] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-30 12:25:37,422] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:39166] as [Down] [2018-03-30 12:25:37,566] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Ignoring received gossip status from unreachable [UniqueAddress(akka.tcp://JobServer@127.0.0.1:39166,-828037535)] [2018-03-30 12:25:37,615] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-f6-a392-7c6fe3c8c599 [2018-03-30 12:25:37,625] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A39166-17/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:39166]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:39166] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:39166 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:25:38,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:39166] [2018-03-30 12:25:38,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:39166] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:44:18,891] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-af-b181-0c4fa2832fc7 [2018-03-30 12:44:18,891] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext2662124754317138453 for context knimeSparkContext [2018-03-30 12:44:18,892] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext2662124754317138453, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:44:21,067] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:35197] is JOINING, roles [manager] [2018-03-30 12:44:21,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:35197] to [Up] [2018-03-30 12:44:21,329] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:35197/user/* [2018-03-30 12:44:21,329] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$s] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:44:24,840] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:35197/user/jobManager-af-b181-0c4fa2832fc7#-1967042047] java.lang.IllegalArgumentException: Required executor memory (5120+512 MB) is above the max threshold (5120 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 12:44:24,841] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:35197] as [Down] [2018-03-30 12:44:24,975] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A35197-18/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:35197]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:35197] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:35197 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:44:25,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:35197] [2018-03-30 12:44:25,286] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:35197] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:45:36,518] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-57-a347-f5ab67efdf71 [2018-03-30 12:45:36,518] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext8818115642182174893 for context knimeSparkContext [2018-03-30 12:45:36,518] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext8818115642182174893, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:45:38,672] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:45157] is JOINING, roles [manager] [2018-03-30 12:45:39,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:45157] to [Up] [2018-03-30 12:45:39,336] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:45157/user/* [2018-03-30 12:45:39,337] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$t] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:46:11,518] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 12:46:52,073] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 12:56:38,708] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-30 12:56:38,716] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:45157] as [Down] [2018-03-30 12:56:38,862] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-57-a347-f5ab67efdf71 [2018-03-30 12:56:38,883] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A45157-19/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:45157]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:45157] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:45157 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 12:56:39,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:45157] [2018-03-30 12:56:39,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:45157] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 12:58:26,154] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-0e-86d7-f01e3fbd4546 [2018-03-30 12:58:26,155] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext1077376043928697805 for context knimeSparkContext [2018-03-30 12:58:26,155] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext1077376043928697805, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 12:58:28,292] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:44218] is JOINING, roles [manager] [2018-03-30 12:58:29,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:44218] to [Up] [2018-03-30 12:58:29,348] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:44218/user/* [2018-03-30 12:58:29,348] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$u] - Starting actor spark.jobserver.JobResultActor [2018-03-30 12:58:38,596] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 12:58:44,627] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 13:07:49,999] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-30 13:07:49,999] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:44218] as [Down] [2018-03-30 13:07:50,159] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-0e-86d7-f01e3fbd4546 [2018-03-30 13:07:50,160] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A44218-20/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:44218]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:44218] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:44218 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 13:07:50,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:44218] [2018-03-30 13:07:50,286] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:44218] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 13:07:53,861] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-6e-ba6e-5fb54f6f7dd9 [2018-03-30 13:07:53,862] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext7101317270396554646 for context knimeSparkContext [2018-03-30 13:07:53,862] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext7101317270396554646, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 13:07:56,128] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:42508] is JOINING, roles [manager] [2018-03-30 13:07:56,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:42508] to [Up] [2018-03-30 13:07:56,353] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:42508/user/* [2018-03-30 13:07:56,354] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$v] - Starting actor spark.jobserver.JobResultActor [2018-03-30 13:08:09,976] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:42508/user/jobManager-6e-ba6e-5fb54f6f7dd9#1510013449] org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master. at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:124) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:64) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 13:08:09,978] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:42508] as [Down] [2018-03-30 13:08:10,045] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A42508-21/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:42508]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:42508] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:42508 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 13:08:10,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:42508] [2018-03-30 13:08:10,286] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:42508] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 13:08:36,466] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-35-87e5-76bc1f7de4d7 [2018-03-30 13:08:36,467] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext3523362651053815454 for context knimeSparkContext [2018-03-30 13:08:36,467] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext3523362651053815454, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 13:08:38,560] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:46575] is JOINING, roles [manager] [2018-03-30 13:08:39,276] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:46575] to [Up] [2018-03-30 13:08:39,325] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:46575/user/* [2018-03-30 13:08:39,326] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$w] - Starting actor spark.jobserver.JobResultActor [2018-03-30 13:09:11,534] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:46575/user/jobManager-35-87e5-76bc1f7de4d7#307583849] java.lang.NullPointerException at org.apache.spark.SparkContext.(SparkContext.scala:592) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 13:09:11,535] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:46575] as [Down] [2018-03-30 13:09:11,578] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A46575-22/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:46575]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:46575] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:46575 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 13:09:12,277] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:46575] [2018-03-30 13:09:12,277] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:46575] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 13:09:58,054] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-03-8d56-45427a184db7 [2018-03-30 13:09:58,054] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext4434709240598613602 for context knimeSparkContext [2018-03-30 13:09:58,055] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext4434709240598613602, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 13:10:00,170] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:43599] is JOINING, roles [manager] [2018-03-30 13:10:00,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:43599] to [Up] [2018-03-30 13:10:00,319] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:43599/user/* [2018-03-30 13:10:00,320] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$x] - Starting actor spark.jobserver.JobResultActor [2018-03-30 13:10:08,345] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 13:10:40,292] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-30 13:10:40,292] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:43599] as [Down] [2018-03-30 13:10:40,479] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-03-8d56-45427a184db7 [2018-03-30 13:10:40,489] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A43599-23/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:43599]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:43599] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:43599 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 13:10:41,277] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:43599] [2018-03-30 13:10:41,277] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:43599] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 13:10:42,873] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-5e-aab9-053d08a70b40 [2018-03-30 13:10:42,873] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext7500994062518208840 for context knimeSparkContext [2018-03-30 13:10:42,874] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext7500994062518208840, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 13:10:45,063] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:38755] is JOINING, roles [manager] [2018-03-30 13:10:45,286] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:38755] to [Up] [2018-03-30 13:10:45,335] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:38755/user/* [2018-03-30 13:10:45,336] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$y] - Starting actor spark.jobserver.JobResultActor [2018-03-30 13:11:17,448] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:38755/user/jobManager-5e-aab9-053d08a70b40#-1264186090] java.lang.NullPointerException at org.apache.spark.SparkContext.(SparkContext.scala:592) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 13:11:17,452] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:38755] as [Down] [2018-03-30 13:11:17,510] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A38755-24/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:38755]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:38755] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:38755 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 13:11:18,276] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:38755] [2018-03-30 13:11:18,276] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:38755] is now quarantined, all messages to this address will be delivered to dead letters. [2018-03-30 13:19:42,012] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-52-af22-4c27a7e212bf [2018-03-30 13:19:42,012] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext1333384047809919411 for context knimeSparkContext [2018-03-30 13:19:42,013] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext1333384047809919411, akka.tcp://JobServer@127.0.0.1:33301] [2018-03-30 13:19:44,155] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Node [akka.tcp://JobServer@127.0.0.1:35871] is JOINING, roles [manager] [2018-03-30 13:19:44,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:35871] to [Up] [2018-03-30 13:19:44,366] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:35871/user/* [2018-03-30 13:19:44,367] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$z] - Starting actor spark.jobserver.JobResultActor [2018-03-30 13:19:57,743] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:35871/user/jobManager-52-af22-4c27a7e212bf#230869646] org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master. at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:124) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:64) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-03-30 13:19:57,744] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Marking node [akka.tcp://JobServer@127.0.0.1:35871] as [Down] [2018-03-30 13:19:57,816] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A35871-25/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:33301] <- [akka.tcp://JobServer@127.0.0.1:35871]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:35871] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:35871 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-30 13:19:58,287] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:33301] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:35871] [2018-03-30 13:19:58,287] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:35871] is now quarantined, all messages to this address will be delivered to dead letters. BUILD-VERSION: Commit 0abe49552a6fcdf6a5aca565d624cda255342c33 on branch 0.6.2-KNIME_cdh-5.13 built on 2017-12-06 18:06:16+01:00 [2018-03-30 13:37:49,671] INFO spark.jobserver.JobServer$ [] [] - Starting JobServer with config { # system properties "ui" : { # system properties "enabled" : "true", # system properties "killEnabled" : "true" }, # system properties "app" : { # system properties "name" : "spark.jobserver.JobServer" }, # application.conf: 5 # spark web UI port "webUrlPort" : 8080, # system properties "submit" : { # system properties "deployMode" : "client" }, # system properties "serializer" : "org.apache.spark.serializer.KryoSerializer", # system properties "lineage" : { # system properties "enabled" : "false", # system properties "log" : { # system properties "dir" : "/var/log/spark/lineage" } }, # system properties "executor" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "uri" : "", # system properties "extraJavaOptions" : "-Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server" }, # merge of /opt/spark-job-server/environment.conf: 25,application.conf: 7 # ######################################################################### # Job server settings # ######################################################################### "jobserver" : { # /opt/spark-job-server/environment.conf: 27 # TCP port that the job server listens on for HTTP requests "port" : 8090, # merge of /opt/spark-job-server/environment.conf: 37,application.conf: 40 "sqldao" : { # /opt/spark-job-server/environment.conf: 38 "rootdir" : "/tmp/spark-job-server/sqldao/data", # application.conf: 42 # Slick database driver, full classpath "slick-driver" : "scala.slick.driver.H2Driver", # /opt/spark-job-server/environment.conf: 41 "dbcp" : { # /opt/spark-job-server/environment.conf: 45 "maxidle" : 10, # /opt/spark-job-server/environment.conf: 43 "maxactive" : 20, # /opt/spark-job-server/environment.conf: 42 "connectonstart" : true, # /opt/spark-job-server/environment.conf: 44 "minidle" : 1, # /opt/spark-job-server/environment.conf: 46 "initialsize" : 10 }, # application.conf: 45 # JDBC driver, full classpath "jdbc-driver" : "org.h2.Driver", # merge of /opt/spark-job-server/environment.conf: 39,application.conf: 52 # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass "jdbc" : { # application.conf: 55 "password" : "", # application.conf: 54 "user" : "", # /opt/spark-job-server/environment.conf: 39 "url" : "jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db;AUTO_SERVER=TRUE;AUTO_RECONNECT=TRUE" } }, # application.conf: 87 # spark broadcst factory in yarn deployment # Versions prior to 1.1.0, spark default broadcast factory is org.apache.spark.broadcast.HttpBroadcastFactory. # Can't start multiple sparkContexts in the same JVM with HttpBroadcastFactory. "yarn-broadcast-factory" : "org.apache.spark.broadcast.TorrentBroadcastFactory", # /opt/spark-job-server/environment.conf: 52 # Storage directory for files that are uploaded to the server # via POST/data commands "datadao" : { # /opt/spark-job-server/environment.conf: 53 "rootdir" : "/tmp/spark-job-server/upload" }, # application.conf: 9 "bind-address" : "0.0.0.0", # /opt/spark-job-server/environment.conf: 65 # Timeouts for Spark context creation in seconds. In "yarn-client" mode, use # the first option, in all other modes, use the second one. If context # creation takes longer than the timeout, the jobserver closes the context. # Especially in yarn-client mode, context creation includes container # allocation, which can take a while. "yarn-context-creation-timeout" : "60 s", # /opt/spark-job-server/environment.conf: 66 "context-creation-timeout" : "60 s", # application.conf: 69 # The ask pattern timeout for Api "short-timeout" : "3 s", # /opt/spark-job-server/environment.conf: 58 # Number of jobs that can be run simultaneously per context. If not set, # defaults to number of cores on machine where Spark job server is running. "max-jobs-per-context" : 100, # /opt/spark-job-server/environment.conf: 36 # Class to use to persist data such as jars, applications, jobs, etc. # spark.jobserver.io.JobFileDAO uses the file system for persistence # spark.jobserver.io.JobSqlDAO uses an SQL database for persistence # "jobdao" : "spark.jobserver.io.JobSqlDAO", # /opt/spark-job-server/environment.conf: 30 # Directory where the job server stores uploaded jar files "jar-store-rootdir" : "/tmp/spark-job-server/jars", # application.conf: 24 "filedao" : { # application.conf: 25 "rootdir" : "/tmp/spark-jobserver/filedao/data" }, # application.conf: 15 # Number of job results to keep per JobResultActor/context "job-result-cache-size" : 5000, # /opt/spark-job-server/environment.conf: 71 # If true, a separate JVM is forked for each Spark context. # KERBEROS NOTE: In a Kerberos-enabled environment, you should set this to true # (as well as shiro authentication, see later in this file). "context-per-jvm" : true, # application.conf: 75 # Time out for job server to wait while creating named objects "named-object-creation-timeout" : "60 s" }, # merge of /opt/spark-job-server/environment.conf: 90,application.conf: 103 # Default settings for Spark contexts. These settings can be overridden on a # per-context basis. Please consult the Spark documentation for more details # on available settings. # Default settings for ad hoc as well as manually created contexts # You can add any Spark config params here, for example, spark.mesos.coarse = true "context-settings" : { # application.conf: 111 "streaming" : { # application.conf: 116 # if true, stops gracefully by waiting for the processing of all received data to be completed "stopGracefully" : true, # application.conf: 120 # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be # stopped regardless of whether the StreamingContext has been started. "stopSparkContext" : true, # application.conf: 113 # Default batch interval for Spark Streaming contexts in milliseconds "batch_interval" : 1000 }, # application.conf: 127 # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize "context-init-timeout" : "60s", # application.conf: 109 # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext "context-factory" : "spark.jobserver.context.DefaultSparkContextFactory", # application.conf: 129 "passthrough" : { # application.conf: 130 "spark" : { # application.conf: 130 "driver" : { # application.conf: 130 "allowMultipleContexts" : true } } }, # /opt/spark-job-server/environment.conf: 106 # Required setting, that sets "spark.executor.memory" in the SparkConf. Sets # the amount of memory to use per Spark executor process, in the same format # as JVM memory strings (e.g. 512m, 2g). # # Note: In "yarn-client" mode, Spark will request this amount of memory per YARN # container, plus some additional overhead. "memory-per-node" : "1G", # /opt/spark-job-server/environment.conf: 98 # Required setting, that sets "spark.cores.max" in the SparkConf. This sets # the maximum amount of CPU cores to request for the Spark on the cluster # (not from each machine). # # IMPORTANT: Note that although required by job-server, this setting only # has an effect in Standalone and Mesos clusters. "num-cpu-cores" : 2 }, # /opt/spark-job-server/environment.conf: 82 # Sets the "spark.master" property in the SparkConf. KNIME recommends # "yarn-client" for production use and "local[4]" for debugging purposes. # master = "local[4]" # Run Spark locally with 4 worker threads "master" : "yarn-client", # system properties "dynamicAllocation" : { # system properties "enabled" : "true", # system properties "minExecutors" : "0", # system properties "executorIdleTimeout" : "60", # system properties "schedulerBacklogTimeout" : "1" }, # system properties "authenticate" : { # system properties "enableSaslEncryption" : "false" }, # system properties "sql" : { # system properties "queryExecutionListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener" }, # system properties "shuffle" : { # system properties "encryption" : { # system properties "enabled" : "false" }, # system properties "service" : { # system properties "port" : "7337", # system properties "enabled" : "true" } }, # system properties "eventLog" : { # system properties "enabled" : "true", # system properties "dir" : "hdfs://ip-172-31-34-104.us-west-2.compute.internal:8020/user/spark/applicationHistory" }, # system properties "driver" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "extraJavaOptions" : "-XX:+UseConcMarkSweepGC\n -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:/opt/spark-job-server/gc.out\n -XX:MaxPermSize=512m\n -XX:+CMSClassUnloadingEnabled -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server -Dspark.executor.uri= ", # system properties "memory" : "2G" }, # application.conf: 93 # predefined Spark contexts # Below is an example, but do not uncomment it. Everything defined here is carried over to # deploy-time configs, so they will be created in all environments. :( "contexts" : {}, # system properties "extraListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener", # system properties "yarn" : { # system properties "am" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native" }, # system properties "jar" : "local:/usr/lib/spark/lib/spark-assembly.jar", # system properties "historyServer" : { # system properties "allowTracking" : "true", # system properties "address" : "http://ip-172-31-34-104.us-west-2.compute.internal:18088" } }, # system properties "jars" : "file:/opt/spark-job-server/spark-job-server.jar" } [2018-03-30 13:37:49,674] INFO spark.jobserver.JobServer$ [] [] - Spray config: { # reference.conf: 1478 # After a `Timedout` message has been sent to the timeout handler and the # request still hasn't been completed after the time period set here # the server will complete the request itself with an error response. # Set to `infinite` to disable timeout timeouts. "timeout-timeout" : "2 s", # application.conf: 187 # ssl engine provider protocols "enabledProtocols" : [ # application.conf: 187 "SSLv3", # application.conf: 187 "TLSv1" ], # application.conf: 190 "pipelining-limit" : 2, # reference.conf: 1575 # The time period within which a connection handler must have been # registered after the bind handler has received a `Connected` event. # Set to `infinite` to disable. "registration-timeout" : "1s", # /opt/spark-job-server/environment.conf: 154 # parsing { # max-content-length = 200m # } "request-timeout" : "60 s", # reference.conf: 1600 "back-pressure" : { # reference.conf: 1604 # The reciprocal rate of requested Acks per NoAcks. E.g. the default value # '10' means that every 10th write request is acknowledged. This affects the # number of writes each connection has to buffer even in absence of back-pressure. "noack-rate" : 10, # reference.conf: 1609 # The lower limit the write queue size has to shrink to before reads are resumed. # Use 'infinite' to disable the low-watermark so that reading is resumed instantly # after the next successful write. "reading-low-watermark" : "infinite" }, # reference.conf: 1570 # The time period within which the TCP unbinding process must be completed. # Set to `infinite` to disable. "unbind-timeout" : "1s", # reference.conf: 1500 # Enables/disables support for statistics collection and querying. # Even though stats keeping overhead is small, # for maximum performance switch off when not needed. "stats-support" : "on", # reference.conf: 1490 # The path of the actor to send `spray.http.Timedout` messages to. # If empty all `Timedout` messages will go to the "regular" request # handling actor. "timeout-handler" : "", # /opt/spark-job-server/environment.conf: 155 "idle-timeout" : "120 s", # /opt/spark-job-server/environment.conf: 156 "request-chunk-aggregation-limit" : "200m", # reference.conf: 1449 # Enables/disables SSL encryption. # If enabled the server uses the implicit `ServerSSLEngineProvider` member # of the `Bind` command to create `SSLEngine` instances for the underlying # IO connection. "ssl-encryption" : "off", # application.conf: 192 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge) # Needed for HTTP/1.0 requests with missing Host headers "default-host-header" : "spray.io:8765", # reference.conf: 1504 # Enables/disables the addition of a `Remote-Address` header # holding the clients (remote) IP address. "remote-address-header" : "off", # reference.conf: 1584 # The time after which a connection is aborted (RST) after a parsing error # occurred. The timeout prevents a connection which is already known to be # erroneous from receiving evermore data even if all of the data will be ignored. # However, in case of a connection abortion the client usually doesn't properly # receive the error response. This timeout is a trade-off which allows the client # some time to finish its request and receive a proper error response before the # connection is forcibly closed to free resources. "parsing-error-abort-timeout" : "2s", # merge of application.conf: 195,reference.conf: 1750 # The (default) configuration of the HTTP message parser for the server and # the client. # IMPORTANT: These settings (i.e. children of `spray.can.parsing`) can't be directly # overridden in `application.conf` to change the parser settings for client and server # altogether (see https://github.com/spray/spray/issues/346). Instead, override the # concrete settings beneath `spray.can.server.parsing` and `spray.can.client.parsing` # where these settings are copied to. "parsing" : { # reference.conf: 1758 "max-chunk-ext-length" : 256, # reference.conf: 1759 "max-chunk-size" : "1m", # application.conf: 195 # Increase this in order to upload bigger job jars "max-content-length" : "30m", # reference.conf: 1756 "max-header-count" : 64, # reference.conf: 1773 # Sets the strictness mode for parsing request target URIs. # The following values are defined: # # `strict`: RFC3986-compliant URIs are required, # a 400 response is triggered on violations # # `relaxed`: all visible 7-Bit ASCII chars are allowed # # `relaxed-with-raw-query`: like `relaxed` but additionally # the URI query is not parsed, but delivered as one raw string # as the `key` value of a single Query structure element. # "uri-parsing-mode" : "strict", # reference.conf: 1753 "max-response-reason-length" : 64, # reference.conf: 1783 # Enables/disables the logging of warning messages in case an incoming # message (request or response) contains an HTTP header which cannot be # parsed into its high-level model class due to incompatible syntax. # Note that, independently of this settings, spray will accept messages # with such headers as long as the message as a whole would still be legal # under the HTTP specification even without this header. # If a header cannot be parsed into a high-level model instance it will be # provided as a `RawHeader`. "illegal-header-warnings" : "on", # reference.conf: 1755 "max-header-value-length" : "8k", # reference.conf: 1787 # limits for the number of different values per header type that the # header cache will hold "header-cache" : { # reference.conf: 1792 "If-Modified-Since" : 0, # reference.conf: 1794 "If-Range" : 0, # reference.conf: 1788 "default" : 12, # reference.conf: 1790 "Date" : 0, # reference.conf: 1789 "Content-MD5" : 0, # reference.conf: 1796 "User-Agent" : 32, # reference.conf: 1795 "If-Unmodified-Since" : 0, # reference.conf: 1793 "If-None-Match" : 0, # reference.conf: 1791 "If-Match" : 0 }, # reference.conf: 1752 # The limits for the various parts of the HTTP message parser. "max-uri-length" : "2k", # reference.conf: 1754 "max-header-name-length" : 64, # reference.conf: 1807 # Enables/disables inclusion of an SSL-Session-Info header in parsed # messages over SSL transports (i.e., HttpRequest on server side and # HttpResponse on client side). "ssl-session-info-header" : "off", # reference.conf: 1802 # Sets the size starting from which incoming http-messages will be delivered # in chunks regardless of whether chunking is actually used on the wire. # Set to infinite to disable auto chunking. "incoming-auto-chunking-threshold-size" : "infinite" }, # reference.conf: 1566 # The time period within which the TCP binding process must be completed. # Set to `infinite` to disable. "bind-timeout" : "1s", # reference.conf: 1536 # Enables/disables the returning of more detailed error messages to # the client in the error response. # Should be disabled for browser-facing APIs due to the risk of XSS attacks # and (probably) enabled for internal or non-browser APIs. # Note that spray will always produce log messages containing the full # error details. "verbose-error-messages" : "off", # reference.conf: 1516 # Enables/disables automatic handling of HEAD requests. # If this setting is enabled the server dispatches HEAD requests as GET # requests to the application and automatically strips off all message # bodies from outgoing responses. # Note that, even when this setting is off the server will never send # out message bodies on responses to HEAD requests. "transparent-head-requests" : "on", # application.conf: 183 "keystoreType" : "JKS", # reference.conf: 1541 # Enables/disables the logging of the full (potentially multiple line) # error message to the server logs. # If disabled only a single line will be logged. "verbose-error-logging" : "off", # reference.conf: 1598 # Enables/disables automatic back-pressure handling by write buffering and # receive throttling "automatic-back-pressure-handling" : "on", # reference.conf: 1443-2058 # Always contains the deployed version of spray. # Referenced, for example, from the `spray.can.server.server-header` setting. "server-header" : "spray-can/1.2.3", # application.conf: 185 # key manager factory provider "provider" : "SunX509", # reference.conf: 1613 # Enables more verbose DEBUG logging for debugging SSL related issues. "ssl-tracing" : "off", # reference.conf: 1562 # For HTTPS connections this setting specified the maximum number of # bytes that are encrypted in one go. Large responses are broken down in # chunks of this size so as to already begin sending before the response has # been encrypted entirely. "max-encryption-chunk-size" : "1m", # reference.conf: 1508 # Enables/disables the addition of a `Raw-Request-URI` header holding the # original raw request URI as the client has sent it. "raw-request-uri-header" : "off", # application.conf: 182 # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples # typical are either SSL or TLS "encryptionType" : "SSL", # reference.conf: 1485 # The period during which a service must respond to a `ChunkedRequestStart` message # with a `RegisterChunkHandler` message. During the registration period reading from # the network is suspended. It is still possible that some chunks have already been # received which will be buffered until the registration is received or the timeout is # triggered. If the timeout is triggered the connection is immediately aborted. "chunkhandler-registration-timeout" : "500 ms", # reference.conf: 1556 # The initial size if the buffer to render the response headers in. # Can be used for fine-tuning response rendering performance but probably # doesn't have to be fiddled with in most applications. "response-header-size-hint" : 512, # reference.conf: 1528 # Enables/disables an alternative response streaming mode that doesn't # use `Transfer-Encoding: chunked` but rather renders the individual # MessageChunks coming in from the application as parts of the original # response entity. # Enabling this mode causes all connections to be closed after a streaming # response has been finished since there is no other way to signal the # response end to the client. # Note that chunkless-streaming is implicitly enabled when streaming # responses to HTTP/1.0 clients (since they don't support # `Transfer-Encoding: chunked`) "chunkless-streaming" : "off", # reference.conf: 1495 # The "granularity" of timeout checking for both idle connections timeouts # as well as request timeouts, should rarely be needed to modify. # If set to `infinite` request and connection timeout checking is disabled. "reaping-cycle" : "250 ms" } [2018-03-30 13:37:49,927] INFO ka.event.slf4j.Slf4jLogger [] [] - Slf4jLogger started [2018-03-30 13:37:50,045] INFO Remoting [] [Remoting] - Starting remoting [2018-03-30 13:37:50,187] INFO Remoting [] [Remoting] - Remoting started; listening on addresses :[akka.tcp://JobServer@127.0.0.1:38864] [2018-03-30 13:37:50,188] INFO Remoting [] [Remoting] - Remoting now listens on addresses: [akka.tcp://JobServer@127.0.0.1:38864] [2018-03-30 13:37:50,209] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:38864] - Starting up... [2018-03-30 13:37:50,284] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:38864] - Registered cluster JMX MBean [akka:type=Cluster] [2018-03-30 13:37:50,289] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:38864] - Started up successfully [2018-03-30 13:37:50,292] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:38864] - No seed-nodes configured, manual cluster join required [2018-03-30 13:37:51,159] INFO ark.jobserver.io.JobSqlDAO [] [] - rootDir is /tmp/spark-job-server/sqldao/data [2018-03-30 13:37:51,250] INFO ternal.util.VersionPrinter [] [] - Flyway 3.2.1 by Boxfuse [2018-03-30 13:37:51,781] INFO dbsupport.DbSupportFactory [] [] - Database: jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db (H2 1.3) [2018-03-30 13:37:52,038] INFO nternal.command.DbValidate [] [] - Validated 1 migration (execution time 00:00.133s) [2018-03-30 13:37:52,059] INFO internal.command.DbMigrate [] [] - Current version of schema "PUBLIC": 0.7.0 [2018-03-30 13:37:52,059] INFO internal.command.DbMigrate [] [] - Schema "PUBLIC" is up to date. No migration necessary. [2018-03-30 13:37:52,102] INFO ark.jobserver.io.JobSqlDAO [] [] - Running dummy query to initialize dbcp. [2018-03-30 13:37:52,635] INFO k.jobserver.io.JobDAOActor [] [akka://JobServer/user/dao-manager] - Starting actor spark.jobserver.io.JobDAOActor [2018-03-30 13:37:52,675] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Starting actor spark.jobserver.DataManagerActor [2018-03-30 13:37:52,676] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Starting actor spark.jobserver.JarManager [2018-03-30 13:37:52,678] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting actor spark.jobserver.AkkaClusterSupervisorActor [2018-03-30 13:37:52,680] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/global-result-actor] - Starting actor spark.jobserver.JobResultActor [2018-03-30 13:37:52,686] INFO AkkaClusterSupervisorActor [] [] - AkkaClusterSupervisor initialized on akka.tcp://JobServer@127.0.0.1:38864 [2018-03-30 13:37:52,698] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:38864] - Node [akka.tcp://JobServer@127.0.0.1:38864] is JOINING, roles [supervisor] [2018-03-30 13:37:52,699] INFO ark.jobserver.JobInfoActor [] [akka://JobServer/user/job-info] - Starting actor spark.jobserver.JobInfoActor [2018-03-30 13:37:52,958] INFO spark.jobserver.WebApi [] [] - No authentication. [2018-03-30 13:37:53,139] INFO spark.jobserver.WebApi [] [] - Starting browser web service... [2018-03-30 13:37:53,323] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:38864] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:38864] to [Up] [2018-03-30 13:37:54,030] INFO ay.can.server.HttpListener [] [akka://JobServer/user/IO-HTTP/listener-0] - Bound to /0.0.0.0:8090 [2018-03-30 13:40:07,397] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-74-af56-d5f058bcf0fc [2018-03-30 13:40:07,407] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext7464959709041012272 for context knimeSparkContext [2018-03-30 13:40:07,418] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext7464959709041012272, akka.tcp://JobServer@127.0.0.1:38864] [2018-03-30 13:40:10,072] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:38864] - Node [akka.tcp://JobServer@127.0.0.1:44884] is JOINING, roles [manager] [2018-03-30 13:40:10,336] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:38864] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:44884] to [Up] [2018-03-30 13:40:10,428] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:44884/user/* [2018-03-30 13:40:10,429] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$a] - Starting actor spark.jobserver.JobResultActor [2018-03-30 13:40:21,427] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-30 13:40:22,272] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 13:40:23,931] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-30 13:40:24,065] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 13:40:26,815] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 13:40:32,503] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 13:40:33,858] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 13:40:35,222] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 13:40:36,522] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 13:40:36,812] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [8] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 13:40:37,104] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [9] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-30 13:40:37,393] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#105115790] to Actor[akka://JobServer/deadLetters] was not delivered. [10] dead letters encountered, no more dead letters will be logged. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. BUILD-VERSION: Commit 0abe49552a6fcdf6a5aca565d624cda255342c33 on branch 0.6.2-KNIME_cdh-5.13 built on 2017-12-06 18:06:16+01:00 [2018-03-31 11:58:08,823] INFO spark.jobserver.JobServer$ [] [] - Starting JobServer with config { # system properties "ui" : { # system properties "enabled" : "true", # system properties "killEnabled" : "true" }, # system properties "app" : { # system properties "name" : "spark.jobserver.JobServer" }, # application.conf: 5 # spark web UI port "webUrlPort" : 8080, # system properties "submit" : { # system properties "deployMode" : "client" }, # system properties "serializer" : "org.apache.spark.serializer.KryoSerializer", # system properties "lineage" : { # system properties "enabled" : "false", # system properties "log" : { # system properties "dir" : "/var/log/spark/lineage" } }, # system properties "executor" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "uri" : "", # system properties "extraJavaOptions" : "-Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server" }, # merge of /opt/spark-job-server/environment.conf: 25,application.conf: 7 # ######################################################################### # Job server settings # ######################################################################### "jobserver" : { # /opt/spark-job-server/environment.conf: 27 # TCP port that the job server listens on for HTTP requests "port" : 8090, # merge of /opt/spark-job-server/environment.conf: 37,application.conf: 40 "sqldao" : { # /opt/spark-job-server/environment.conf: 38 "rootdir" : "/tmp/spark-job-server/sqldao/data", # application.conf: 42 # Slick database driver, full classpath "slick-driver" : "scala.slick.driver.H2Driver", # /opt/spark-job-server/environment.conf: 41 "dbcp" : { # /opt/spark-job-server/environment.conf: 45 "maxidle" : 10, # /opt/spark-job-server/environment.conf: 43 "maxactive" : 20, # /opt/spark-job-server/environment.conf: 42 "connectonstart" : true, # /opt/spark-job-server/environment.conf: 44 "minidle" : 1, # /opt/spark-job-server/environment.conf: 46 "initialsize" : 10 }, # application.conf: 45 # JDBC driver, full classpath "jdbc-driver" : "org.h2.Driver", # merge of /opt/spark-job-server/environment.conf: 39,application.conf: 52 # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass "jdbc" : { # application.conf: 55 "password" : "", # application.conf: 54 "user" : "", # /opt/spark-job-server/environment.conf: 39 "url" : "jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db;AUTO_SERVER=TRUE;AUTO_RECONNECT=TRUE" } }, # application.conf: 87 # spark broadcst factory in yarn deployment # Versions prior to 1.1.0, spark default broadcast factory is org.apache.spark.broadcast.HttpBroadcastFactory. # Can't start multiple sparkContexts in the same JVM with HttpBroadcastFactory. "yarn-broadcast-factory" : "org.apache.spark.broadcast.TorrentBroadcastFactory", # /opt/spark-job-server/environment.conf: 52 # Storage directory for files that are uploaded to the server # via POST/data commands "datadao" : { # /opt/spark-job-server/environment.conf: 53 "rootdir" : "/tmp/spark-job-server/upload" }, # application.conf: 9 "bind-address" : "0.0.0.0", # /opt/spark-job-server/environment.conf: 65 # Timeouts for Spark context creation in seconds. In "yarn-client" mode, use # the first option, in all other modes, use the second one. If context # creation takes longer than the timeout, the jobserver closes the context. # Especially in yarn-client mode, context creation includes container # allocation, which can take a while. "yarn-context-creation-timeout" : "60 s", # /opt/spark-job-server/environment.conf: 66 "context-creation-timeout" : "60 s", # application.conf: 69 # The ask pattern timeout for Api "short-timeout" : "3 s", # /opt/spark-job-server/environment.conf: 58 # Number of jobs that can be run simultaneously per context. If not set, # defaults to number of cores on machine where Spark job server is running. "max-jobs-per-context" : 100, # /opt/spark-job-server/environment.conf: 36 # Class to use to persist data such as jars, applications, jobs, etc. # spark.jobserver.io.JobFileDAO uses the file system for persistence # spark.jobserver.io.JobSqlDAO uses an SQL database for persistence # "jobdao" : "spark.jobserver.io.JobSqlDAO", # /opt/spark-job-server/environment.conf: 30 # Directory where the job server stores uploaded jar files "jar-store-rootdir" : "/tmp/spark-job-server/jars", # application.conf: 24 "filedao" : { # application.conf: 25 "rootdir" : "/tmp/spark-jobserver/filedao/data" }, # application.conf: 15 # Number of job results to keep per JobResultActor/context "job-result-cache-size" : 5000, # /opt/spark-job-server/environment.conf: 71 # If true, a separate JVM is forked for each Spark context. # KERBEROS NOTE: In a Kerberos-enabled environment, you should set this to true # (as well as shiro authentication, see later in this file). "context-per-jvm" : true, # application.conf: 75 # Time out for job server to wait while creating named objects "named-object-creation-timeout" : "60 s" }, # merge of /opt/spark-job-server/environment.conf: 90,application.conf: 103 # Default settings for Spark contexts. These settings can be overridden on a # per-context basis. Please consult the Spark documentation for more details # on available settings. # Default settings for ad hoc as well as manually created contexts # You can add any Spark config params here, for example, spark.mesos.coarse = true "context-settings" : { # application.conf: 111 "streaming" : { # application.conf: 116 # if true, stops gracefully by waiting for the processing of all received data to be completed "stopGracefully" : true, # application.conf: 120 # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be # stopped regardless of whether the StreamingContext has been started. "stopSparkContext" : true, # application.conf: 113 # Default batch interval for Spark Streaming contexts in milliseconds "batch_interval" : 1000 }, # application.conf: 127 # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize "context-init-timeout" : "60s", # application.conf: 109 # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext "context-factory" : "spark.jobserver.context.DefaultSparkContextFactory", # application.conf: 129 "passthrough" : { # application.conf: 130 "spark" : { # application.conf: 130 "driver" : { # application.conf: 130 "allowMultipleContexts" : true } } }, # /opt/spark-job-server/environment.conf: 106 # Required setting, that sets "spark.executor.memory" in the SparkConf. Sets # the amount of memory to use per Spark executor process, in the same format # as JVM memory strings (e.g. 512m, 2g). # # Note: In "yarn-client" mode, Spark will request this amount of memory per YARN # container, plus some additional overhead. "memory-per-node" : "1G", # /opt/spark-job-server/environment.conf: 98 # Required setting, that sets "spark.cores.max" in the SparkConf. This sets # the maximum amount of CPU cores to request for the Spark on the cluster # (not from each machine). # # IMPORTANT: Note that although required by job-server, this setting only # has an effect in Standalone and Mesos clusters. "num-cpu-cores" : 2 }, # /opt/spark-job-server/environment.conf: 82 # Sets the "spark.master" property in the SparkConf. KNIME recommends # "yarn-client" for production use and "local[4]" for debugging purposes. # master = "local[4]" # Run Spark locally with 4 worker threads "master" : "yarn-client", # system properties "dynamicAllocation" : { # system properties "enabled" : "true", # system properties "minExecutors" : "0", # system properties "executorIdleTimeout" : "60", # system properties "schedulerBacklogTimeout" : "1" }, # system properties "authenticate" : { # system properties "enableSaslEncryption" : "false" }, # system properties "sql" : { # system properties "queryExecutionListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener" }, # system properties "shuffle" : { # system properties "encryption" : { # system properties "enabled" : "false" }, # system properties "service" : { # system properties "port" : "7337", # system properties "enabled" : "true" } }, # system properties "eventLog" : { # system properties "enabled" : "true", # system properties "dir" : "hdfs://ip-172-31-34-104.us-west-2.compute.internal:8020/user/spark/applicationHistory" }, # system properties "driver" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "extraJavaOptions" : "-XX:+UseConcMarkSweepGC\n -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:/opt/spark-job-server/gc.out\n -XX:MaxPermSize=512m\n -XX:+CMSClassUnloadingEnabled -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server -Dspark.executor.uri= ", # system properties "memory" : "2G" }, # application.conf: 93 # predefined Spark contexts # Below is an example, but do not uncomment it. Everything defined here is carried over to # deploy-time configs, so they will be created in all environments. :( "contexts" : {}, # system properties "extraListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener", # system properties "yarn" : { # system properties "am" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native" }, # system properties "jar" : "local:/usr/lib/spark/lib/spark-assembly.jar", # system properties "historyServer" : { # system properties "allowTracking" : "true", # system properties "address" : "http://ip-172-31-34-104.us-west-2.compute.internal:18088" } }, # system properties "jars" : "file:/opt/spark-job-server/spark-job-server.jar" } [2018-03-31 11:58:08,826] INFO spark.jobserver.JobServer$ [] [] - Spray config: { # reference.conf: 1478 # After a `Timedout` message has been sent to the timeout handler and the # request still hasn't been completed after the time period set here # the server will complete the request itself with an error response. # Set to `infinite` to disable timeout timeouts. "timeout-timeout" : "2 s", # application.conf: 187 # ssl engine provider protocols "enabledProtocols" : [ # application.conf: 187 "SSLv3", # application.conf: 187 "TLSv1" ], # application.conf: 190 "pipelining-limit" : 2, # reference.conf: 1575 # The time period within which a connection handler must have been # registered after the bind handler has received a `Connected` event. # Set to `infinite` to disable. "registration-timeout" : "1s", # /opt/spark-job-server/environment.conf: 154 # parsing { # max-content-length = 200m # } "request-timeout" : "60 s", # reference.conf: 1600 "back-pressure" : { # reference.conf: 1604 # The reciprocal rate of requested Acks per NoAcks. E.g. the default value # '10' means that every 10th write request is acknowledged. This affects the # number of writes each connection has to buffer even in absence of back-pressure. "noack-rate" : 10, # reference.conf: 1609 # The lower limit the write queue size has to shrink to before reads are resumed. # Use 'infinite' to disable the low-watermark so that reading is resumed instantly # after the next successful write. "reading-low-watermark" : "infinite" }, # reference.conf: 1570 # The time period within which the TCP unbinding process must be completed. # Set to `infinite` to disable. "unbind-timeout" : "1s", # reference.conf: 1500 # Enables/disables support for statistics collection and querying. # Even though stats keeping overhead is small, # for maximum performance switch off when not needed. "stats-support" : "on", # reference.conf: 1490 # The path of the actor to send `spray.http.Timedout` messages to. # If empty all `Timedout` messages will go to the "regular" request # handling actor. "timeout-handler" : "", # /opt/spark-job-server/environment.conf: 155 "idle-timeout" : "120 s", # /opt/spark-job-server/environment.conf: 156 "request-chunk-aggregation-limit" : "200m", # reference.conf: 1449 # Enables/disables SSL encryption. # If enabled the server uses the implicit `ServerSSLEngineProvider` member # of the `Bind` command to create `SSLEngine` instances for the underlying # IO connection. "ssl-encryption" : "off", # application.conf: 192 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge) # Needed for HTTP/1.0 requests with missing Host headers "default-host-header" : "spray.io:8765", # reference.conf: 1504 # Enables/disables the addition of a `Remote-Address` header # holding the clients (remote) IP address. "remote-address-header" : "off", # reference.conf: 1584 # The time after which a connection is aborted (RST) after a parsing error # occurred. The timeout prevents a connection which is already known to be # erroneous from receiving evermore data even if all of the data will be ignored. # However, in case of a connection abortion the client usually doesn't properly # receive the error response. This timeout is a trade-off which allows the client # some time to finish its request and receive a proper error response before the # connection is forcibly closed to free resources. "parsing-error-abort-timeout" : "2s", # merge of application.conf: 195,reference.conf: 1750 # The (default) configuration of the HTTP message parser for the server and # the client. # IMPORTANT: These settings (i.e. children of `spray.can.parsing`) can't be directly # overridden in `application.conf` to change the parser settings for client and server # altogether (see https://github.com/spray/spray/issues/346). Instead, override the # concrete settings beneath `spray.can.server.parsing` and `spray.can.client.parsing` # where these settings are copied to. "parsing" : { # reference.conf: 1758 "max-chunk-ext-length" : 256, # reference.conf: 1759 "max-chunk-size" : "1m", # application.conf: 195 # Increase this in order to upload bigger job jars "max-content-length" : "30m", # reference.conf: 1756 "max-header-count" : 64, # reference.conf: 1773 # Sets the strictness mode for parsing request target URIs. # The following values are defined: # # `strict`: RFC3986-compliant URIs are required, # a 400 response is triggered on violations # # `relaxed`: all visible 7-Bit ASCII chars are allowed # # `relaxed-with-raw-query`: like `relaxed` but additionally # the URI query is not parsed, but delivered as one raw string # as the `key` value of a single Query structure element. # "uri-parsing-mode" : "strict", # reference.conf: 1753 "max-response-reason-length" : 64, # reference.conf: 1783 # Enables/disables the logging of warning messages in case an incoming # message (request or response) contains an HTTP header which cannot be # parsed into its high-level model class due to incompatible syntax. # Note that, independently of this settings, spray will accept messages # with such headers as long as the message as a whole would still be legal # under the HTTP specification even without this header. # If a header cannot be parsed into a high-level model instance it will be # provided as a `RawHeader`. "illegal-header-warnings" : "on", # reference.conf: 1755 "max-header-value-length" : "8k", # reference.conf: 1787 # limits for the number of different values per header type that the # header cache will hold "header-cache" : { # reference.conf: 1792 "If-Modified-Since" : 0, # reference.conf: 1794 "If-Range" : 0, # reference.conf: 1788 "default" : 12, # reference.conf: 1790 "Date" : 0, # reference.conf: 1789 "Content-MD5" : 0, # reference.conf: 1796 "User-Agent" : 32, # reference.conf: 1795 "If-Unmodified-Since" : 0, # reference.conf: 1793 "If-None-Match" : 0, # reference.conf: 1791 "If-Match" : 0 }, # reference.conf: 1752 # The limits for the various parts of the HTTP message parser. "max-uri-length" : "2k", # reference.conf: 1754 "max-header-name-length" : 64, # reference.conf: 1807 # Enables/disables inclusion of an SSL-Session-Info header in parsed # messages over SSL transports (i.e., HttpRequest on server side and # HttpResponse on client side). "ssl-session-info-header" : "off", # reference.conf: 1802 # Sets the size starting from which incoming http-messages will be delivered # in chunks regardless of whether chunking is actually used on the wire. # Set to infinite to disable auto chunking. "incoming-auto-chunking-threshold-size" : "infinite" }, # reference.conf: 1566 # The time period within which the TCP binding process must be completed. # Set to `infinite` to disable. "bind-timeout" : "1s", # reference.conf: 1536 # Enables/disables the returning of more detailed error messages to # the client in the error response. # Should be disabled for browser-facing APIs due to the risk of XSS attacks # and (probably) enabled for internal or non-browser APIs. # Note that spray will always produce log messages containing the full # error details. "verbose-error-messages" : "off", # reference.conf: 1516 # Enables/disables automatic handling of HEAD requests. # If this setting is enabled the server dispatches HEAD requests as GET # requests to the application and automatically strips off all message # bodies from outgoing responses. # Note that, even when this setting is off the server will never send # out message bodies on responses to HEAD requests. "transparent-head-requests" : "on", # application.conf: 183 "keystoreType" : "JKS", # reference.conf: 1541 # Enables/disables the logging of the full (potentially multiple line) # error message to the server logs. # If disabled only a single line will be logged. "verbose-error-logging" : "off", # reference.conf: 1598 # Enables/disables automatic back-pressure handling by write buffering and # receive throttling "automatic-back-pressure-handling" : "on", # reference.conf: 1443-2058 # Always contains the deployed version of spray. # Referenced, for example, from the `spray.can.server.server-header` setting. "server-header" : "spray-can/1.2.3", # application.conf: 185 # key manager factory provider "provider" : "SunX509", # reference.conf: 1613 # Enables more verbose DEBUG logging for debugging SSL related issues. "ssl-tracing" : "off", # reference.conf: 1562 # For HTTPS connections this setting specified the maximum number of # bytes that are encrypted in one go. Large responses are broken down in # chunks of this size so as to already begin sending before the response has # been encrypted entirely. "max-encryption-chunk-size" : "1m", # reference.conf: 1508 # Enables/disables the addition of a `Raw-Request-URI` header holding the # original raw request URI as the client has sent it. "raw-request-uri-header" : "off", # application.conf: 182 # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples # typical are either SSL or TLS "encryptionType" : "SSL", # reference.conf: 1485 # The period during which a service must respond to a `ChunkedRequestStart` message # with a `RegisterChunkHandler` message. During the registration period reading from # the network is suspended. It is still possible that some chunks have already been # received which will be buffered until the registration is received or the timeout is # triggered. If the timeout is triggered the connection is immediately aborted. "chunkhandler-registration-timeout" : "500 ms", # reference.conf: 1556 # The initial size if the buffer to render the response headers in. # Can be used for fine-tuning response rendering performance but probably # doesn't have to be fiddled with in most applications. "response-header-size-hint" : 512, # reference.conf: 1528 # Enables/disables an alternative response streaming mode that doesn't # use `Transfer-Encoding: chunked` but rather renders the individual # MessageChunks coming in from the application as parts of the original # response entity. # Enabling this mode causes all connections to be closed after a streaming # response has been finished since there is no other way to signal the # response end to the client. # Note that chunkless-streaming is implicitly enabled when streaming # responses to HTTP/1.0 clients (since they don't support # `Transfer-Encoding: chunked`) "chunkless-streaming" : "off", # reference.conf: 1495 # The "granularity" of timeout checking for both idle connections timeouts # as well as request timeouts, should rarely be needed to modify. # If set to `infinite` request and connection timeout checking is disabled. "reaping-cycle" : "250 ms" } [2018-03-31 11:58:09,383] INFO ka.event.slf4j.Slf4jLogger [] [] - Slf4jLogger started [2018-03-31 11:58:09,618] INFO Remoting [] [Remoting] - Starting remoting [2018-03-31 11:58:10,050] INFO Remoting [] [Remoting] - Remoting started; listening on addresses :[akka.tcp://JobServer@127.0.0.1:36424] [2018-03-31 11:58:10,050] INFO Remoting [] [Remoting] - Remoting now listens on addresses: [akka.tcp://JobServer@127.0.0.1:36424] [2018-03-31 11:58:10,110] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - Starting up... [2018-03-31 11:58:10,486] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - Registered cluster JMX MBean [akka:type=Cluster] [2018-03-31 11:58:10,491] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - Started up successfully [2018-03-31 11:58:10,533] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - No seed-nodes configured, manual cluster join required [2018-03-31 11:58:12,397] INFO ark.jobserver.io.JobSqlDAO [] [] - rootDir is /tmp/spark-job-server/sqldao/data [2018-03-31 11:58:12,706] INFO ternal.util.VersionPrinter [] [] - Flyway 3.2.1 by Boxfuse [2018-03-31 11:58:14,775] INFO dbsupport.DbSupportFactory [] [] - Database: jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db (H2 1.3) [2018-03-31 11:58:15,348] INFO nternal.command.DbValidate [] [] - Validated 1 migration (execution time 00:00.309s) [2018-03-31 11:58:15,375] INFO atatable.MetaDataTableImpl [] [] - Creating Metadata table: "PUBLIC"."schema_version" [2018-03-31 11:58:15,410] INFO internal.command.DbMigrate [] [] - Current version of schema "PUBLIC": << Empty Schema >> [2018-03-31 11:58:15,410] INFO internal.command.DbMigrate [] [] - Migrating schema "PUBLIC" to version 0.7.0 - init tables [2018-03-31 11:58:15,555] INFO internal.command.DbMigrate [] [] - Successfully applied 1 migration to schema "PUBLIC" (execution time 00:00.183s). [2018-03-31 11:58:15,776] INFO ark.jobserver.io.JobSqlDAO [] [] - Running dummy query to initialize dbcp. [2018-03-31 11:58:16,957] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Starting actor spark.jobserver.JarManager [2018-03-31 11:58:16,970] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Starting actor spark.jobserver.DataManagerActor [2018-03-31 11:58:16,971] INFO k.jobserver.io.JobDAOActor [] [akka://JobServer/user/dao-manager] - Starting actor spark.jobserver.io.JobDAOActor [2018-03-31 11:58:17,007] INFO ark.jobserver.JobInfoActor [] [akka://JobServer/user/job-info] - Starting actor spark.jobserver.JobInfoActor [2018-03-31 11:58:17,007] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting actor spark.jobserver.AkkaClusterSupervisorActor [2018-03-31 11:58:17,053] INFO AkkaClusterSupervisorActor [] [] - AkkaClusterSupervisor initialized on akka.tcp://JobServer@127.0.0.1:36424 [2018-03-31 11:58:17,054] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/global-result-actor] - Starting actor spark.jobserver.JobResultActor [2018-03-31 11:58:17,094] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - Node [akka.tcp://JobServer@127.0.0.1:36424] is JOINING, roles [supervisor] [2018-03-31 11:58:17,844] INFO spark.jobserver.WebApi [] [] - No authentication. [2018-03-31 11:58:18,005] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:36424] to [Up] [2018-03-31 11:58:18,605] INFO spark.jobserver.WebApi [] [] - Starting browser web service... [2018-03-31 11:58:19,884] INFO ay.can.server.HttpListener [] [akka://JobServer/user/IO-HTTP/listener-0] - Bound to /0.0.0.0:8090 [2018-03-31 11:58:19,886] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.io.Tcp$Bound] from Actor[akka://JobServer/user/IO-HTTP/listener-0#-330759308] to Actor[akka://JobServer/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 12:26:39,786] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-f0-be63-de3d2f2bd38b [2018-03-31 12:26:39,793] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext6844118258657061122 for context knimeSparkContext [2018-03-31 12:26:39,805] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext6844118258657061122, akka.tcp://JobServer@127.0.0.1:36424] [2018-03-31 12:26:42,773] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - Node [akka.tcp://JobServer@127.0.0.1:46822] is JOINING, roles [manager] [2018-03-31 12:26:42,999] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:46822] to [Up] [2018-03-31 12:26:43,139] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:46822/user/* [2018-03-31 12:26:43,145] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$a] - Starting actor spark.jobserver.JobResultActor [2018-03-31 12:26:54,438] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-03-31 12:26:54,591] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_c1d68d31852ba93bbe21b51883e769ded6594a8e_spark-1.6.cdh5_9, 398147 bytes [2018-03-31 12:26:55,238] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-114790873] to Actor[akka://JobServer/deadLetters] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 12:26:56,861] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-03-31 12:26:57,028] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-114790873] to Actor[akka://JobServer/deadLetters] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 12:27:00,597] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-114790873] to Actor[akka://JobServer/deadLetters] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 12:27:06,241] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-114790873] to Actor[akka://JobServer/deadLetters] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 12:27:07,712] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-114790873] to Actor[akka://JobServer/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 12:27:09,042] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-114790873] to Actor[akka://JobServer/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 12:27:10,393] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-114790873] to Actor[akka://JobServer/deadLetters] was not delivered. [8] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 12:27:11,719] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-114790873] to Actor[akka://JobServer/deadLetters] was not delivered. [9] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 12:27:11,972] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#-114790873] to Actor[akka://JobServer/deadLetters] was not delivered. [10] dead letters encountered, no more dead letters will be logged. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-03-31 13:55:08,649] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-03-31 13:55:08,661] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - Marking node [akka.tcp://JobServer@127.0.0.1:46822] as [Down] [2018-03-31 13:55:08,856] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-f0-be63-de3d2f2bd38b [2018-03-31 13:55:08,878] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A46822-0/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:36424] <- [akka.tcp://JobServer@127.0.0.1:46822]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:46822] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:46822 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-03-31 13:55:08,988] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:36424] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:46822] [2018-03-31 13:55:08,990] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:46822] is now quarantined, all messages to this address will be delivered to dead letters. BUILD-VERSION: Commit 0abe49552a6fcdf6a5aca565d624cda255342c33 on branch 0.6.2-KNIME_cdh-5.13 built on 2017-12-06 18:06:16+01:00 [2018-04-03 10:55:05,828] INFO spark.jobserver.JobServer$ [] [] - Starting JobServer with config { # system properties "ui" : { # system properties "enabled" : "true", # system properties "killEnabled" : "true" }, # system properties "app" : { # system properties "name" : "spark.jobserver.JobServer" }, # application.conf: 5 # spark web UI port "webUrlPort" : 8080, # system properties "submit" : { # system properties "deployMode" : "client" }, # system properties "serializer" : "org.apache.spark.serializer.KryoSerializer", # system properties "lineage" : { # system properties "enabled" : "false", # system properties "log" : { # system properties "dir" : "/var/log/spark/lineage" } }, # system properties "executor" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "uri" : "", # system properties "extraJavaOptions" : "-Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server" }, # merge of /opt/spark-job-server/environment.conf: 25,application.conf: 7 # ######################################################################### # Job server settings # ######################################################################### "jobserver" : { # /opt/spark-job-server/environment.conf: 27 # TCP port that the job server listens on for HTTP requests "port" : 8090, # merge of /opt/spark-job-server/environment.conf: 37,application.conf: 40 "sqldao" : { # /opt/spark-job-server/environment.conf: 38 "rootdir" : "/tmp/spark-job-server/sqldao/data", # application.conf: 42 # Slick database driver, full classpath "slick-driver" : "scala.slick.driver.H2Driver", # /opt/spark-job-server/environment.conf: 41 "dbcp" : { # /opt/spark-job-server/environment.conf: 45 "maxidle" : 10, # /opt/spark-job-server/environment.conf: 43 "maxactive" : 20, # /opt/spark-job-server/environment.conf: 42 "connectonstart" : true, # /opt/spark-job-server/environment.conf: 44 "minidle" : 1, # /opt/spark-job-server/environment.conf: 46 "initialsize" : 10 }, # application.conf: 45 # JDBC driver, full classpath "jdbc-driver" : "org.h2.Driver", # merge of /opt/spark-job-server/environment.conf: 39,application.conf: 52 # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass "jdbc" : { # application.conf: 55 "password" : "", # application.conf: 54 "user" : "", # /opt/spark-job-server/environment.conf: 39 "url" : "jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db;AUTO_SERVER=TRUE;AUTO_RECONNECT=TRUE" } }, # application.conf: 87 # spark broadcst factory in yarn deployment # Versions prior to 1.1.0, spark default broadcast factory is org.apache.spark.broadcast.HttpBroadcastFactory. # Can't start multiple sparkContexts in the same JVM with HttpBroadcastFactory. "yarn-broadcast-factory" : "org.apache.spark.broadcast.TorrentBroadcastFactory", # /opt/spark-job-server/environment.conf: 52 # Storage directory for files that are uploaded to the server # via POST/data commands "datadao" : { # /opt/spark-job-server/environment.conf: 53 "rootdir" : "/tmp/spark-job-server/upload" }, # application.conf: 9 "bind-address" : "0.0.0.0", # /opt/spark-job-server/environment.conf: 65 # Timeouts for Spark context creation in seconds. In "yarn-client" mode, use # the first option, in all other modes, use the second one. If context # creation takes longer than the timeout, the jobserver closes the context. # Especially in yarn-client mode, context creation includes container # allocation, which can take a while. "yarn-context-creation-timeout" : "60 s", # /opt/spark-job-server/environment.conf: 66 "context-creation-timeout" : "60 s", # application.conf: 69 # The ask pattern timeout for Api "short-timeout" : "3 s", # /opt/spark-job-server/environment.conf: 58 # Number of jobs that can be run simultaneously per context. If not set, # defaults to number of cores on machine where Spark job server is running. "max-jobs-per-context" : 100, # /opt/spark-job-server/environment.conf: 36 # Class to use to persist data such as jars, applications, jobs, etc. # spark.jobserver.io.JobFileDAO uses the file system for persistence # spark.jobserver.io.JobSqlDAO uses an SQL database for persistence # "jobdao" : "spark.jobserver.io.JobSqlDAO", # /opt/spark-job-server/environment.conf: 30 # Directory where the job server stores uploaded jar files "jar-store-rootdir" : "/tmp/spark-job-server/jars", # application.conf: 24 "filedao" : { # application.conf: 25 "rootdir" : "/tmp/spark-jobserver/filedao/data" }, # application.conf: 15 # Number of job results to keep per JobResultActor/context "job-result-cache-size" : 5000, # /opt/spark-job-server/environment.conf: 71 # If true, a separate JVM is forked for each Spark context. # KERBEROS NOTE: In a Kerberos-enabled environment, you should set this to true # (as well as shiro authentication, see later in this file). "context-per-jvm" : true, # application.conf: 75 # Time out for job server to wait while creating named objects "named-object-creation-timeout" : "60 s" }, # merge of /opt/spark-job-server/environment.conf: 90,application.conf: 103 # Default settings for Spark contexts. These settings can be overridden on a # per-context basis. Please consult the Spark documentation for more details # on available settings. # Default settings for ad hoc as well as manually created contexts # You can add any Spark config params here, for example, spark.mesos.coarse = true "context-settings" : { # application.conf: 111 "streaming" : { # application.conf: 116 # if true, stops gracefully by waiting for the processing of all received data to be completed "stopGracefully" : true, # application.conf: 120 # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be # stopped regardless of whether the StreamingContext has been started. "stopSparkContext" : true, # application.conf: 113 # Default batch interval for Spark Streaming contexts in milliseconds "batch_interval" : 1000 }, # application.conf: 127 # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize "context-init-timeout" : "60s", # application.conf: 109 # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext "context-factory" : "spark.jobserver.context.DefaultSparkContextFactory", # application.conf: 129 "passthrough" : { # application.conf: 130 "spark" : { # application.conf: 130 "driver" : { # application.conf: 130 "allowMultipleContexts" : true } } }, # /opt/spark-job-server/environment.conf: 106 # Required setting, that sets "spark.executor.memory" in the SparkConf. Sets # the amount of memory to use per Spark executor process, in the same format # as JVM memory strings (e.g. 512m, 2g). # # Note: In "yarn-client" mode, Spark will request this amount of memory per YARN # container, plus some additional overhead. "memory-per-node" : "1G", # /opt/spark-job-server/environment.conf: 98 # Required setting, that sets "spark.cores.max" in the SparkConf. This sets # the maximum amount of CPU cores to request for the Spark on the cluster # (not from each machine). # # IMPORTANT: Note that although required by job-server, this setting only # has an effect in Standalone and Mesos clusters. "num-cpu-cores" : 2 }, # /opt/spark-job-server/environment.conf: 82 # Sets the "spark.master" property in the SparkConf. KNIME recommends # "yarn-client" for production use and "local[4]" for debugging purposes. # master = "local[4]" # Run Spark locally with 4 worker threads "master" : "yarn-client", # system properties "dynamicAllocation" : { # system properties "enabled" : "true", # system properties "minExecutors" : "0", # system properties "executorIdleTimeout" : "60", # system properties "schedulerBacklogTimeout" : "1" }, # system properties "authenticate" : { # system properties "enableSaslEncryption" : "false" }, # system properties "sql" : { # system properties "queryExecutionListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener" }, # system properties "shuffle" : { # system properties "encryption" : { # system properties "enabled" : "false" }, # system properties "service" : { # system properties "port" : "7337", # system properties "enabled" : "true" } }, # system properties "eventLog" : { # system properties "enabled" : "true", # system properties "dir" : "hdfs://ip-172-31-34-104.us-west-2.compute.internal:8020/user/spark/applicationHistory" }, # system properties "driver" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "extraJavaOptions" : "-XX:+UseConcMarkSweepGC\n -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:/opt/spark-job-server/gc.out\n -XX:MaxPermSize=512m\n -XX:+CMSClassUnloadingEnabled -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server -Dspark.executor.uri= ", # system properties "memory" : "2G" }, # application.conf: 93 # predefined Spark contexts # Below is an example, but do not uncomment it. Everything defined here is carried over to # deploy-time configs, so they will be created in all environments. :( "contexts" : {}, # system properties "extraListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener", # system properties "yarn" : { # system properties "am" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native" }, # system properties "jar" : "local:/usr/lib/spark/lib/spark-assembly.jar", # system properties "historyServer" : { # system properties "allowTracking" : "true", # system properties "address" : "http://ip-172-31-34-104.us-west-2.compute.internal:18088" } }, # system properties "jars" : "file:/opt/spark-job-server/spark-job-server.jar" } [2018-04-03 10:55:05,831] INFO spark.jobserver.JobServer$ [] [] - Spray config: { # reference.conf: 1478 # After a `Timedout` message has been sent to the timeout handler and the # request still hasn't been completed after the time period set here # the server will complete the request itself with an error response. # Set to `infinite` to disable timeout timeouts. "timeout-timeout" : "2 s", # application.conf: 187 # ssl engine provider protocols "enabledProtocols" : [ # application.conf: 187 "SSLv3", # application.conf: 187 "TLSv1" ], # application.conf: 190 "pipelining-limit" : 2, # reference.conf: 1575 # The time period within which a connection handler must have been # registered after the bind handler has received a `Connected` event. # Set to `infinite` to disable. "registration-timeout" : "1s", # /opt/spark-job-server/environment.conf: 154 # parsing { # max-content-length = 200m # } "request-timeout" : "60 s", # reference.conf: 1600 "back-pressure" : { # reference.conf: 1604 # The reciprocal rate of requested Acks per NoAcks. E.g. the default value # '10' means that every 10th write request is acknowledged. This affects the # number of writes each connection has to buffer even in absence of back-pressure. "noack-rate" : 10, # reference.conf: 1609 # The lower limit the write queue size has to shrink to before reads are resumed. # Use 'infinite' to disable the low-watermark so that reading is resumed instantly # after the next successful write. "reading-low-watermark" : "infinite" }, # reference.conf: 1570 # The time period within which the TCP unbinding process must be completed. # Set to `infinite` to disable. "unbind-timeout" : "1s", # reference.conf: 1500 # Enables/disables support for statistics collection and querying. # Even though stats keeping overhead is small, # for maximum performance switch off when not needed. "stats-support" : "on", # reference.conf: 1490 # The path of the actor to send `spray.http.Timedout` messages to. # If empty all `Timedout` messages will go to the "regular" request # handling actor. "timeout-handler" : "", # /opt/spark-job-server/environment.conf: 155 "idle-timeout" : "120 s", # /opt/spark-job-server/environment.conf: 156 "request-chunk-aggregation-limit" : "200m", # reference.conf: 1449 # Enables/disables SSL encryption. # If enabled the server uses the implicit `ServerSSLEngineProvider` member # of the `Bind` command to create `SSLEngine` instances for the underlying # IO connection. "ssl-encryption" : "off", # application.conf: 192 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge) # Needed for HTTP/1.0 requests with missing Host headers "default-host-header" : "spray.io:8765", # reference.conf: 1504 # Enables/disables the addition of a `Remote-Address` header # holding the clients (remote) IP address. "remote-address-header" : "off", # reference.conf: 1584 # The time after which a connection is aborted (RST) after a parsing error # occurred. The timeout prevents a connection which is already known to be # erroneous from receiving evermore data even if all of the data will be ignored. # However, in case of a connection abortion the client usually doesn't properly # receive the error response. This timeout is a trade-off which allows the client # some time to finish its request and receive a proper error response before the # connection is forcibly closed to free resources. "parsing-error-abort-timeout" : "2s", # merge of application.conf: 195,reference.conf: 1750 # The (default) configuration of the HTTP message parser for the server and # the client. # IMPORTANT: These settings (i.e. children of `spray.can.parsing`) can't be directly # overridden in `application.conf` to change the parser settings for client and server # altogether (see https://github.com/spray/spray/issues/346). Instead, override the # concrete settings beneath `spray.can.server.parsing` and `spray.can.client.parsing` # where these settings are copied to. "parsing" : { # reference.conf: 1758 "max-chunk-ext-length" : 256, # reference.conf: 1759 "max-chunk-size" : "1m", # application.conf: 195 # Increase this in order to upload bigger job jars "max-content-length" : "30m", # reference.conf: 1756 "max-header-count" : 64, # reference.conf: 1773 # Sets the strictness mode for parsing request target URIs. # The following values are defined: # # `strict`: RFC3986-compliant URIs are required, # a 400 response is triggered on violations # # `relaxed`: all visible 7-Bit ASCII chars are allowed # # `relaxed-with-raw-query`: like `relaxed` but additionally # the URI query is not parsed, but delivered as one raw string # as the `key` value of a single Query structure element. # "uri-parsing-mode" : "strict", # reference.conf: 1753 "max-response-reason-length" : 64, # reference.conf: 1783 # Enables/disables the logging of warning messages in case an incoming # message (request or response) contains an HTTP header which cannot be # parsed into its high-level model class due to incompatible syntax. # Note that, independently of this settings, spray will accept messages # with such headers as long as the message as a whole would still be legal # under the HTTP specification even without this header. # If a header cannot be parsed into a high-level model instance it will be # provided as a `RawHeader`. "illegal-header-warnings" : "on", # reference.conf: 1755 "max-header-value-length" : "8k", # reference.conf: 1787 # limits for the number of different values per header type that the # header cache will hold "header-cache" : { # reference.conf: 1792 "If-Modified-Since" : 0, # reference.conf: 1794 "If-Range" : 0, # reference.conf: 1788 "default" : 12, # reference.conf: 1790 "Date" : 0, # reference.conf: 1789 "Content-MD5" : 0, # reference.conf: 1796 "User-Agent" : 32, # reference.conf: 1795 "If-Unmodified-Since" : 0, # reference.conf: 1793 "If-None-Match" : 0, # reference.conf: 1791 "If-Match" : 0 }, # reference.conf: 1752 # The limits for the various parts of the HTTP message parser. "max-uri-length" : "2k", # reference.conf: 1754 "max-header-name-length" : 64, # reference.conf: 1807 # Enables/disables inclusion of an SSL-Session-Info header in parsed # messages over SSL transports (i.e., HttpRequest on server side and # HttpResponse on client side). "ssl-session-info-header" : "off", # reference.conf: 1802 # Sets the size starting from which incoming http-messages will be delivered # in chunks regardless of whether chunking is actually used on the wire. # Set to infinite to disable auto chunking. "incoming-auto-chunking-threshold-size" : "infinite" }, # reference.conf: 1566 # The time period within which the TCP binding process must be completed. # Set to `infinite` to disable. "bind-timeout" : "1s", # reference.conf: 1536 # Enables/disables the returning of more detailed error messages to # the client in the error response. # Should be disabled for browser-facing APIs due to the risk of XSS attacks # and (probably) enabled for internal or non-browser APIs. # Note that spray will always produce log messages containing the full # error details. "verbose-error-messages" : "off", # reference.conf: 1516 # Enables/disables automatic handling of HEAD requests. # If this setting is enabled the server dispatches HEAD requests as GET # requests to the application and automatically strips off all message # bodies from outgoing responses. # Note that, even when this setting is off the server will never send # out message bodies on responses to HEAD requests. "transparent-head-requests" : "on", # application.conf: 183 "keystoreType" : "JKS", # reference.conf: 1541 # Enables/disables the logging of the full (potentially multiple line) # error message to the server logs. # If disabled only a single line will be logged. "verbose-error-logging" : "off", # reference.conf: 1598 # Enables/disables automatic back-pressure handling by write buffering and # receive throttling "automatic-back-pressure-handling" : "on", # reference.conf: 1443-2058 # Always contains the deployed version of spray. # Referenced, for example, from the `spray.can.server.server-header` setting. "server-header" : "spray-can/1.2.3", # application.conf: 185 # key manager factory provider "provider" : "SunX509", # reference.conf: 1613 # Enables more verbose DEBUG logging for debugging SSL related issues. "ssl-tracing" : "off", # reference.conf: 1562 # For HTTPS connections this setting specified the maximum number of # bytes that are encrypted in one go. Large responses are broken down in # chunks of this size so as to already begin sending before the response has # been encrypted entirely. "max-encryption-chunk-size" : "1m", # reference.conf: 1508 # Enables/disables the addition of a `Raw-Request-URI` header holding the # original raw request URI as the client has sent it. "raw-request-uri-header" : "off", # application.conf: 182 # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples # typical are either SSL or TLS "encryptionType" : "SSL", # reference.conf: 1485 # The period during which a service must respond to a `ChunkedRequestStart` message # with a `RegisterChunkHandler` message. During the registration period reading from # the network is suspended. It is still possible that some chunks have already been # received which will be buffered until the registration is received or the timeout is # triggered. If the timeout is triggered the connection is immediately aborted. "chunkhandler-registration-timeout" : "500 ms", # reference.conf: 1556 # The initial size if the buffer to render the response headers in. # Can be used for fine-tuning response rendering performance but probably # doesn't have to be fiddled with in most applications. "response-header-size-hint" : 512, # reference.conf: 1528 # Enables/disables an alternative response streaming mode that doesn't # use `Transfer-Encoding: chunked` but rather renders the individual # MessageChunks coming in from the application as parts of the original # response entity. # Enabling this mode causes all connections to be closed after a streaming # response has been finished since there is no other way to signal the # response end to the client. # Note that chunkless-streaming is implicitly enabled when streaming # responses to HTTP/1.0 clients (since they don't support # `Transfer-Encoding: chunked`) "chunkless-streaming" : "off", # reference.conf: 1495 # The "granularity" of timeout checking for both idle connections timeouts # as well as request timeouts, should rarely be needed to modify. # If set to `infinite` request and connection timeout checking is disabled. "reaping-cycle" : "250 ms" } [2018-04-03 10:55:06,403] INFO ka.event.slf4j.Slf4jLogger [] [] - Slf4jLogger started [2018-04-03 10:55:06,581] INFO Remoting [] [Remoting] - Starting remoting [2018-04-03 10:55:07,152] INFO Remoting [] [Remoting] - Remoting started; listening on addresses :[akka.tcp://JobServer@127.0.0.1:40295] [2018-04-03 10:55:07,154] INFO Remoting [] [Remoting] - Remoting now listens on addresses: [akka.tcp://JobServer@127.0.0.1:40295] [2018-04-03 10:55:07,205] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Starting up... [2018-04-03 10:55:07,464] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Registered cluster JMX MBean [akka:type=Cluster] [2018-04-03 10:55:07,464] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Started up successfully [2018-04-03 10:55:07,480] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - No seed-nodes configured, manual cluster join required [2018-04-03 10:55:09,341] INFO ark.jobserver.io.JobSqlDAO [] [] - rootDir is /tmp/spark-job-server/sqldao/data [2018-04-03 10:55:09,522] INFO ternal.util.VersionPrinter [] [] - Flyway 3.2.1 by Boxfuse [2018-04-03 10:55:10,352] INFO dbsupport.DbSupportFactory [] [] - Database: jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db (H2 1.3) [2018-04-03 10:55:11,542] INFO nternal.command.DbValidate [] [] - Validated 1 migration (execution time 00:00.933s) [2018-04-03 10:55:11,585] INFO atatable.MetaDataTableImpl [] [] - Creating Metadata table: "PUBLIC"."schema_version" [2018-04-03 10:55:11,613] INFO internal.command.DbMigrate [] [] - Current version of schema "PUBLIC": << Empty Schema >> [2018-04-03 10:55:11,613] INFO internal.command.DbMigrate [] [] - Migrating schema "PUBLIC" to version 0.7.0 - init tables [2018-04-03 10:55:11,664] INFO internal.command.DbMigrate [] [] - Successfully applied 1 migration to schema "PUBLIC" (execution time 00:00.089s). [2018-04-03 10:55:11,777] INFO ark.jobserver.io.JobSqlDAO [] [] - Running dummy query to initialize dbcp. [2018-04-03 10:55:12,600] INFO k.jobserver.io.JobDAOActor [] [akka://JobServer/user/dao-manager] - Starting actor spark.jobserver.io.JobDAOActor [2018-04-03 10:55:12,654] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Starting actor spark.jobserver.DataManagerActor [2018-04-03 10:55:12,664] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Starting actor spark.jobserver.JarManager [2018-04-03 10:55:12,669] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting actor spark.jobserver.AkkaClusterSupervisorActor [2018-04-03 10:55:12,689] INFO ark.jobserver.JobInfoActor [] [akka://JobServer/user/job-info] - Starting actor spark.jobserver.JobInfoActor [2018-04-03 10:55:12,835] INFO AkkaClusterSupervisorActor [] [] - AkkaClusterSupervisor initialized on akka.tcp://JobServer@127.0.0.1:40295 [2018-04-03 10:55:12,835] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/global-result-actor] - Starting actor spark.jobserver.JobResultActor [2018-04-03 10:55:12,944] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Node [akka.tcp://JobServer@127.0.0.1:40295] is JOINING, roles [supervisor] [2018-04-03 10:55:13,169] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:40295] to [Up] [2018-04-03 10:55:13,620] INFO spark.jobserver.WebApi [] [] - No authentication. [2018-04-03 10:55:14,131] INFO spark.jobserver.WebApi [] [] - Starting browser web service... [2018-04-03 10:55:16,836] INFO ay.can.server.HttpListener [] [akka://JobServer/user/IO-HTTP/listener-0] - Bound to /0.0.0.0:8090 [2018-04-03 10:55:16,836] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.io.Tcp$Bound] from Actor[akka://JobServer/user/IO-HTTP/listener-0#-1731570991] to Actor[akka://JobServer/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:03:00,670] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-06-86de-328fc9c28e02 [2018-04-03 11:03:00,676] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext7522280126866658006 for context knimeSparkContext [2018-04-03 11:03:00,687] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext7522280126866658006, akka.tcp://JobServer@127.0.0.1:40295] [2018-04-03 11:03:02,900] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Node [akka.tcp://JobServer@127.0.0.1:33448] is JOINING, roles [manager] [2018-04-03 11:03:03,061] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:33448] to [Up] [2018-04-03 11:03:03,175] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:33448/user/* [2018-04-03 11:03:03,177] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$a] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:03:07,125] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:33448/user/jobManager-06-86de-328fc9c28e02#1123169564] java.lang.IllegalArgumentException: Required executor memory (6144+614 MB) is above the max threshold (5120 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-04-03 11:03:07,138] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Marking node [akka.tcp://JobServer@127.0.0.1:33448] as [Down] [2018-04-03 11:03:07,331] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A38304-1] - Message [akka.remote.transport.AssociationHandle$Disassociated] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A38304-1#817051347] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:03:07,349] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A33448-0/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:40295] <- [akka.tcp://JobServer@127.0.0.1:33448]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:33448] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:33448 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 11:03:07,354] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A38304-1] - Message [akka.remote.transport.ActorTransportAdapter$DisassociateUnderlying] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A38304-1#817051347] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:03:07,355] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A33448-0/endpointWriter] - Message [akka.actor.FSM$Timer] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A33448-0/endpointWriter#-769986837] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:03:07,946] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#758611422] to Actor[akka://JobServer/deadLetters] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:03:08,046] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:33448] [2018-04-03 11:03:08,047] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:33448] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 11:03:08,945] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#758611422] to Actor[akka://JobServer/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:03:09,946] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#758611422] to Actor[akka://JobServer/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:03:10,946] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#758611422] to Actor[akka://JobServer/deadLetters] was not delivered. [8] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:03:11,936] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#758611422] to Actor[akka://JobServer/deadLetters] was not delivered. [9] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:03:12,946] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#758611422] to Actor[akka://JobServer/deadLetters] was not delivered. [10] dead letters encountered, no more dead letters will be logged. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:04:31,889] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-6f-80e6-62ad5177ba55 [2018-04-03 11:04:31,890] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext2121376575089627501 for context knimeSparkContext [2018-04-03 11:04:31,890] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext2121376575089627501, akka.tcp://JobServer@127.0.0.1:40295] [2018-04-03 11:04:33,958] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Node [akka.tcp://JobServer@127.0.0.1:46233] is JOINING, roles [manager] [2018-04-03 11:04:34,060] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:46233] to [Up] [2018-04-03 11:04:34,284] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:46233/user/* [2018-04-03 11:04:34,285] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$b] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:04:37,741] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:46233/user/jobManager-6f-80e6-62ad5177ba55#-1382069963] java.lang.IllegalArgumentException: Required executor memory (5120+512 MB) is above the max threshold (5120 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-04-03 11:04:37,742] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Marking node [akka.tcp://JobServer@127.0.0.1:46233] as [Down] [2018-04-03 11:04:37,975] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A46233-1/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:40295] <- [akka.tcp://JobServer@127.0.0.1:46233]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:46233] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:46233 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 11:04:38,056] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:46233] [2018-04-03 11:04:38,056] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:46233] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 11:05:43,087] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-7b-a739-57789a4fbd6e [2018-04-03 11:05:43,088] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext3353236200469702405 for context knimeSparkContext [2018-04-03 11:05:43,088] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext3353236200469702405, akka.tcp://JobServer@127.0.0.1:40295] [2018-04-03 11:05:45,201] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Node [akka.tcp://JobServer@127.0.0.1:44086] is JOINING, roles [manager] [2018-04-03 11:05:46,057] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:44086] to [Up] [2018-04-03 11:05:46,124] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:44086/user/* [2018-04-03 11:05:46,125] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$c] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:06:18,353] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 11:06:18,459] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_c1d68d31852ba93bbe21b51883e769ded6594a8e_spark-1.6.cdh5_9, 398149 bytes [2018-04-03 11:12:56,922] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-7b-a739-57789a4fbd6e [2018-04-03 11:12:56,924] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:40295] - Marking node [akka.tcp://JobServer@127.0.0.1:44086] as [Down] [2018-04-03 11:12:56,947] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A44086-2/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:40295] <- [akka.tcp://JobServer@127.0.0.1:44086]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:44086] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:44086 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] BUILD-VERSION: Commit 0abe49552a6fcdf6a5aca565d624cda255342c33 on branch 0.6.2-KNIME_cdh-5.13 built on 2017-12-06 18:06:16+01:00 [2018-04-03 11:18:23,516] INFO spark.jobserver.JobServer$ [] [] - Starting JobServer with config { # system properties "ui" : { # system properties "enabled" : "true", # system properties "killEnabled" : "true" }, # system properties "app" : { # system properties "name" : "spark.jobserver.JobServer" }, # application.conf: 5 # spark web UI port "webUrlPort" : 8080, # system properties "submit" : { # system properties "deployMode" : "client" }, # system properties "serializer" : "org.apache.spark.serializer.KryoSerializer", # system properties "lineage" : { # system properties "enabled" : "false", # system properties "log" : { # system properties "dir" : "/var/log/spark/lineage" } }, # system properties "executor" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "uri" : "", # system properties "extraJavaOptions" : "-Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server" }, # merge of /opt/spark-job-server/environment.conf: 25,application.conf: 7 # ######################################################################### # Job server settings # ######################################################################### "jobserver" : { # /opt/spark-job-server/environment.conf: 27 # TCP port that the job server listens on for HTTP requests "port" : 8090, # merge of /opt/spark-job-server/environment.conf: 37,application.conf: 40 "sqldao" : { # /opt/spark-job-server/environment.conf: 38 "rootdir" : "/tmp/spark-job-server/sqldao/data", # application.conf: 42 # Slick database driver, full classpath "slick-driver" : "scala.slick.driver.H2Driver", # /opt/spark-job-server/environment.conf: 41 "dbcp" : { # /opt/spark-job-server/environment.conf: 45 "maxidle" : 10, # /opt/spark-job-server/environment.conf: 43 "maxactive" : 20, # /opt/spark-job-server/environment.conf: 42 "connectonstart" : true, # /opt/spark-job-server/environment.conf: 44 "minidle" : 1, # /opt/spark-job-server/environment.conf: 46 "initialsize" : 10 }, # application.conf: 45 # JDBC driver, full classpath "jdbc-driver" : "org.h2.Driver", # merge of /opt/spark-job-server/environment.conf: 39,application.conf: 52 # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass "jdbc" : { # application.conf: 55 "password" : "", # application.conf: 54 "user" : "", # /opt/spark-job-server/environment.conf: 39 "url" : "jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db;AUTO_SERVER=TRUE;AUTO_RECONNECT=TRUE" } }, # application.conf: 87 # spark broadcst factory in yarn deployment # Versions prior to 1.1.0, spark default broadcast factory is org.apache.spark.broadcast.HttpBroadcastFactory. # Can't start multiple sparkContexts in the same JVM with HttpBroadcastFactory. "yarn-broadcast-factory" : "org.apache.spark.broadcast.TorrentBroadcastFactory", # /opt/spark-job-server/environment.conf: 52 # Storage directory for files that are uploaded to the server # via POST/data commands "datadao" : { # /opt/spark-job-server/environment.conf: 53 "rootdir" : "/tmp/spark-job-server/upload" }, # application.conf: 9 "bind-address" : "0.0.0.0", # /opt/spark-job-server/environment.conf: 65 # Timeouts for Spark context creation in seconds. In "yarn-client" mode, use # the first option, in all other modes, use the second one. If context # creation takes longer than the timeout, the jobserver closes the context. # Especially in yarn-client mode, context creation includes container # allocation, which can take a while. "yarn-context-creation-timeout" : "60 s", # /opt/spark-job-server/environment.conf: 66 "context-creation-timeout" : "60 s", # application.conf: 69 # The ask pattern timeout for Api "short-timeout" : "3 s", # /opt/spark-job-server/environment.conf: 58 # Number of jobs that can be run simultaneously per context. If not set, # defaults to number of cores on machine where Spark job server is running. "max-jobs-per-context" : 100, # /opt/spark-job-server/environment.conf: 36 # Class to use to persist data such as jars, applications, jobs, etc. # spark.jobserver.io.JobFileDAO uses the file system for persistence # spark.jobserver.io.JobSqlDAO uses an SQL database for persistence # "jobdao" : "spark.jobserver.io.JobSqlDAO", # /opt/spark-job-server/environment.conf: 30 # Directory where the job server stores uploaded jar files "jar-store-rootdir" : "/tmp/spark-job-server/jars", # application.conf: 24 "filedao" : { # application.conf: 25 "rootdir" : "/tmp/spark-jobserver/filedao/data" }, # application.conf: 15 # Number of job results to keep per JobResultActor/context "job-result-cache-size" : 5000, # /opt/spark-job-server/environment.conf: 71 # If true, a separate JVM is forked for each Spark context. # KERBEROS NOTE: In a Kerberos-enabled environment, you should set this to true # (as well as shiro authentication, see later in this file). "context-per-jvm" : true, # application.conf: 75 # Time out for job server to wait while creating named objects "named-object-creation-timeout" : "60 s" }, # merge of /opt/spark-job-server/environment.conf: 90,application.conf: 103 # Default settings for Spark contexts. These settings can be overridden on a # per-context basis. Please consult the Spark documentation for more details # on available settings. # Default settings for ad hoc as well as manually created contexts # You can add any Spark config params here, for example, spark.mesos.coarse = true "context-settings" : { # application.conf: 111 "streaming" : { # application.conf: 116 # if true, stops gracefully by waiting for the processing of all received data to be completed "stopGracefully" : true, # application.conf: 120 # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be # stopped regardless of whether the StreamingContext has been started. "stopSparkContext" : true, # application.conf: 113 # Default batch interval for Spark Streaming contexts in milliseconds "batch_interval" : 1000 }, # application.conf: 127 # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize "context-init-timeout" : "60s", # application.conf: 109 # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext "context-factory" : "spark.jobserver.context.DefaultSparkContextFactory", # application.conf: 129 "passthrough" : { # application.conf: 130 "spark" : { # application.conf: 130 "driver" : { # application.conf: 130 "allowMultipleContexts" : true } } }, # /opt/spark-job-server/environment.conf: 106 # Required setting, that sets "spark.executor.memory" in the SparkConf. Sets # the amount of memory to use per Spark executor process, in the same format # as JVM memory strings (e.g. 512m, 2g). # # Note: In "yarn-client" mode, Spark will request this amount of memory per YARN # container, plus some additional overhead. "memory-per-node" : "1G", # /opt/spark-job-server/environment.conf: 98 # Required setting, that sets "spark.cores.max" in the SparkConf. This sets # the maximum amount of CPU cores to request for the Spark on the cluster # (not from each machine). # # IMPORTANT: Note that although required by job-server, this setting only # has an effect in Standalone and Mesos clusters. "num-cpu-cores" : 2 }, # /opt/spark-job-server/environment.conf: 82 # Sets the "spark.master" property in the SparkConf. KNIME recommends # "yarn-client" for production use and "local[4]" for debugging purposes. # master = "local[4]" # Run Spark locally with 4 worker threads "master" : "yarn-client", # system properties "dynamicAllocation" : { # system properties "enabled" : "true", # system properties "minExecutors" : "0", # system properties "executorIdleTimeout" : "60", # system properties "schedulerBacklogTimeout" : "1" }, # system properties "authenticate" : { # system properties "enableSaslEncryption" : "false" }, # system properties "sql" : { # system properties "queryExecutionListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener" }, # system properties "shuffle" : { # system properties "encryption" : { # system properties "enabled" : "false" }, # system properties "service" : { # system properties "port" : "7337", # system properties "enabled" : "true" } }, # system properties "eventLog" : { # system properties "enabled" : "true", # system properties "dir" : "hdfs://ip-172-31-34-104.us-west-2.compute.internal:8020/user/spark/applicationHistory" }, # system properties "driver" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "extraJavaOptions" : "-XX:+UseConcMarkSweepGC\n -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:/opt/spark-job-server/gc.out\n -XX:MaxPermSize=512m\n -XX:+CMSClassUnloadingEnabled -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server -Dspark.executor.uri= ", # system properties "memory" : "2G" }, # application.conf: 93 # predefined Spark contexts # Below is an example, but do not uncomment it. Everything defined here is carried over to # deploy-time configs, so they will be created in all environments. :( "contexts" : {}, # system properties "extraListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener", # system properties "yarn" : { # system properties "am" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native" }, # system properties "jar" : "local:/usr/lib/spark/lib/spark-assembly.jar", # system properties "historyServer" : { # system properties "allowTracking" : "true", # system properties "address" : "http://ip-172-31-34-104.us-west-2.compute.internal:18088" } }, # system properties "jars" : "file:/opt/spark-job-server/spark-job-server.jar" } [2018-04-03 11:18:23,519] INFO spark.jobserver.JobServer$ [] [] - Spray config: { # reference.conf: 1478 # After a `Timedout` message has been sent to the timeout handler and the # request still hasn't been completed after the time period set here # the server will complete the request itself with an error response. # Set to `infinite` to disable timeout timeouts. "timeout-timeout" : "2 s", # application.conf: 187 # ssl engine provider protocols "enabledProtocols" : [ # application.conf: 187 "SSLv3", # application.conf: 187 "TLSv1" ], # application.conf: 190 "pipelining-limit" : 2, # reference.conf: 1575 # The time period within which a connection handler must have been # registered after the bind handler has received a `Connected` event. # Set to `infinite` to disable. "registration-timeout" : "1s", # /opt/spark-job-server/environment.conf: 154 # parsing { # max-content-length = 200m # } "request-timeout" : "60 s", # reference.conf: 1600 "back-pressure" : { # reference.conf: 1604 # The reciprocal rate of requested Acks per NoAcks. E.g. the default value # '10' means that every 10th write request is acknowledged. This affects the # number of writes each connection has to buffer even in absence of back-pressure. "noack-rate" : 10, # reference.conf: 1609 # The lower limit the write queue size has to shrink to before reads are resumed. # Use 'infinite' to disable the low-watermark so that reading is resumed instantly # after the next successful write. "reading-low-watermark" : "infinite" }, # reference.conf: 1570 # The time period within which the TCP unbinding process must be completed. # Set to `infinite` to disable. "unbind-timeout" : "1s", # reference.conf: 1500 # Enables/disables support for statistics collection and querying. # Even though stats keeping overhead is small, # for maximum performance switch off when not needed. "stats-support" : "on", # reference.conf: 1490 # The path of the actor to send `spray.http.Timedout` messages to. # If empty all `Timedout` messages will go to the "regular" request # handling actor. "timeout-handler" : "", # /opt/spark-job-server/environment.conf: 155 "idle-timeout" : "120 s", # /opt/spark-job-server/environment.conf: 156 "request-chunk-aggregation-limit" : "200m", # reference.conf: 1449 # Enables/disables SSL encryption. # If enabled the server uses the implicit `ServerSSLEngineProvider` member # of the `Bind` command to create `SSLEngine` instances for the underlying # IO connection. "ssl-encryption" : "off", # application.conf: 192 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge) # Needed for HTTP/1.0 requests with missing Host headers "default-host-header" : "spray.io:8765", # reference.conf: 1504 # Enables/disables the addition of a `Remote-Address` header # holding the clients (remote) IP address. "remote-address-header" : "off", # reference.conf: 1584 # The time after which a connection is aborted (RST) after a parsing error # occurred. The timeout prevents a connection which is already known to be # erroneous from receiving evermore data even if all of the data will be ignored. # However, in case of a connection abortion the client usually doesn't properly # receive the error response. This timeout is a trade-off which allows the client # some time to finish its request and receive a proper error response before the # connection is forcibly closed to free resources. "parsing-error-abort-timeout" : "2s", # merge of application.conf: 195,reference.conf: 1750 # The (default) configuration of the HTTP message parser for the server and # the client. # IMPORTANT: These settings (i.e. children of `spray.can.parsing`) can't be directly # overridden in `application.conf` to change the parser settings for client and server # altogether (see https://github.com/spray/spray/issues/346). Instead, override the # concrete settings beneath `spray.can.server.parsing` and `spray.can.client.parsing` # where these settings are copied to. "parsing" : { # reference.conf: 1758 "max-chunk-ext-length" : 256, # reference.conf: 1759 "max-chunk-size" : "1m", # application.conf: 195 # Increase this in order to upload bigger job jars "max-content-length" : "30m", # reference.conf: 1756 "max-header-count" : 64, # reference.conf: 1773 # Sets the strictness mode for parsing request target URIs. # The following values are defined: # # `strict`: RFC3986-compliant URIs are required, # a 400 response is triggered on violations # # `relaxed`: all visible 7-Bit ASCII chars are allowed # # `relaxed-with-raw-query`: like `relaxed` but additionally # the URI query is not parsed, but delivered as one raw string # as the `key` value of a single Query structure element. # "uri-parsing-mode" : "strict", # reference.conf: 1753 "max-response-reason-length" : 64, # reference.conf: 1783 # Enables/disables the logging of warning messages in case an incoming # message (request or response) contains an HTTP header which cannot be # parsed into its high-level model class due to incompatible syntax. # Note that, independently of this settings, spray will accept messages # with such headers as long as the message as a whole would still be legal # under the HTTP specification even without this header. # If a header cannot be parsed into a high-level model instance it will be # provided as a `RawHeader`. "illegal-header-warnings" : "on", # reference.conf: 1755 "max-header-value-length" : "8k", # reference.conf: 1787 # limits for the number of different values per header type that the # header cache will hold "header-cache" : { # reference.conf: 1792 "If-Modified-Since" : 0, # reference.conf: 1794 "If-Range" : 0, # reference.conf: 1788 "default" : 12, # reference.conf: 1790 "Date" : 0, # reference.conf: 1789 "Content-MD5" : 0, # reference.conf: 1796 "User-Agent" : 32, # reference.conf: 1795 "If-Unmodified-Since" : 0, # reference.conf: 1793 "If-None-Match" : 0, # reference.conf: 1791 "If-Match" : 0 }, # reference.conf: 1752 # The limits for the various parts of the HTTP message parser. "max-uri-length" : "2k", # reference.conf: 1754 "max-header-name-length" : 64, # reference.conf: 1807 # Enables/disables inclusion of an SSL-Session-Info header in parsed # messages over SSL transports (i.e., HttpRequest on server side and # HttpResponse on client side). "ssl-session-info-header" : "off", # reference.conf: 1802 # Sets the size starting from which incoming http-messages will be delivered # in chunks regardless of whether chunking is actually used on the wire. # Set to infinite to disable auto chunking. "incoming-auto-chunking-threshold-size" : "infinite" }, # reference.conf: 1566 # The time period within which the TCP binding process must be completed. # Set to `infinite` to disable. "bind-timeout" : "1s", # reference.conf: 1536 # Enables/disables the returning of more detailed error messages to # the client in the error response. # Should be disabled for browser-facing APIs due to the risk of XSS attacks # and (probably) enabled for internal or non-browser APIs. # Note that spray will always produce log messages containing the full # error details. "verbose-error-messages" : "off", # reference.conf: 1516 # Enables/disables automatic handling of HEAD requests. # If this setting is enabled the server dispatches HEAD requests as GET # requests to the application and automatically strips off all message # bodies from outgoing responses. # Note that, even when this setting is off the server will never send # out message bodies on responses to HEAD requests. "transparent-head-requests" : "on", # application.conf: 183 "keystoreType" : "JKS", # reference.conf: 1541 # Enables/disables the logging of the full (potentially multiple line) # error message to the server logs. # If disabled only a single line will be logged. "verbose-error-logging" : "off", # reference.conf: 1598 # Enables/disables automatic back-pressure handling by write buffering and # receive throttling "automatic-back-pressure-handling" : "on", # reference.conf: 1443-2058 # Always contains the deployed version of spray. # Referenced, for example, from the `spray.can.server.server-header` setting. "server-header" : "spray-can/1.2.3", # application.conf: 185 # key manager factory provider "provider" : "SunX509", # reference.conf: 1613 # Enables more verbose DEBUG logging for debugging SSL related issues. "ssl-tracing" : "off", # reference.conf: 1562 # For HTTPS connections this setting specified the maximum number of # bytes that are encrypted in one go. Large responses are broken down in # chunks of this size so as to already begin sending before the response has # been encrypted entirely. "max-encryption-chunk-size" : "1m", # reference.conf: 1508 # Enables/disables the addition of a `Raw-Request-URI` header holding the # original raw request URI as the client has sent it. "raw-request-uri-header" : "off", # application.conf: 182 # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples # typical are either SSL or TLS "encryptionType" : "SSL", # reference.conf: 1485 # The period during which a service must respond to a `ChunkedRequestStart` message # with a `RegisterChunkHandler` message. During the registration period reading from # the network is suspended. It is still possible that some chunks have already been # received which will be buffered until the registration is received or the timeout is # triggered. If the timeout is triggered the connection is immediately aborted. "chunkhandler-registration-timeout" : "500 ms", # reference.conf: 1556 # The initial size if the buffer to render the response headers in. # Can be used for fine-tuning response rendering performance but probably # doesn't have to be fiddled with in most applications. "response-header-size-hint" : 512, # reference.conf: 1528 # Enables/disables an alternative response streaming mode that doesn't # use `Transfer-Encoding: chunked` but rather renders the individual # MessageChunks coming in from the application as parts of the original # response entity. # Enabling this mode causes all connections to be closed after a streaming # response has been finished since there is no other way to signal the # response end to the client. # Note that chunkless-streaming is implicitly enabled when streaming # responses to HTTP/1.0 clients (since they don't support # `Transfer-Encoding: chunked`) "chunkless-streaming" : "off", # reference.conf: 1495 # The "granularity" of timeout checking for both idle connections timeouts # as well as request timeouts, should rarely be needed to modify. # If set to `infinite` request and connection timeout checking is disabled. "reaping-cycle" : "250 ms" } [2018-04-03 11:18:24,132] INFO ka.event.slf4j.Slf4jLogger [] [] - Slf4jLogger started [2018-04-03 11:18:24,278] INFO Remoting [] [Remoting] - Starting remoting [2018-04-03 11:18:24,551] INFO Remoting [] [Remoting] - Remoting started; listening on addresses :[akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 11:18:24,552] INFO Remoting [] [Remoting] - Remoting now listens on addresses: [akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 11:18:24,586] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Starting up... [2018-04-03 11:18:24,730] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Registered cluster JMX MBean [akka:type=Cluster] [2018-04-03 11:18:24,731] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Started up successfully [2018-04-03 11:18:24,753] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - No seed-nodes configured, manual cluster join required [2018-04-03 11:18:25,785] INFO ark.jobserver.io.JobSqlDAO [] [] - rootDir is /tmp/spark-job-server/sqldao/data [2018-04-03 11:18:25,881] INFO ternal.util.VersionPrinter [] [] - Flyway 3.2.1 by Boxfuse [2018-04-03 11:18:26,580] INFO dbsupport.DbSupportFactory [] [] - Database: jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db (H2 1.3) [2018-04-03 11:18:26,800] INFO nternal.command.DbValidate [] [] - Validated 1 migration (execution time 00:00.063s) [2018-04-03 11:18:26,831] INFO atatable.MetaDataTableImpl [] [] - Creating Metadata table: "PUBLIC"."schema_version" [2018-04-03 11:18:26,864] INFO internal.command.DbMigrate [] [] - Current version of schema "PUBLIC": << Empty Schema >> [2018-04-03 11:18:26,864] INFO internal.command.DbMigrate [] [] - Migrating schema "PUBLIC" to version 0.7.0 - init tables [2018-04-03 11:18:26,916] INFO internal.command.DbMigrate [] [] - Successfully applied 1 migration to schema "PUBLIC" (execution time 00:00.101s). [2018-04-03 11:18:27,025] INFO ark.jobserver.io.JobSqlDAO [] [] - Running dummy query to initialize dbcp. [2018-04-03 11:18:28,039] INFO k.jobserver.io.JobDAOActor [] [akka://JobServer/user/dao-manager] - Starting actor spark.jobserver.io.JobDAOActor [2018-04-03 11:18:28,047] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Starting actor spark.jobserver.DataManagerActor [2018-04-03 11:18:28,054] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Starting actor spark.jobserver.JarManager [2018-04-03 11:18:28,055] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting actor spark.jobserver.AkkaClusterSupervisorActor [2018-04-03 11:18:28,061] INFO ark.jobserver.JobInfoActor [] [akka://JobServer/user/job-info] - Starting actor spark.jobserver.JobInfoActor [2018-04-03 11:18:28,094] INFO AkkaClusterSupervisorActor [] [] - AkkaClusterSupervisor initialized on akka.tcp://JobServer@127.0.0.1:45037 [2018-04-03 11:18:28,094] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/global-result-actor] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:18:28,130] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Node [akka.tcp://JobServer@127.0.0.1:45037] is JOINING, roles [supervisor] [2018-04-03 11:18:28,460] INFO spark.jobserver.WebApi [] [] - No authentication. [2018-04-03 11:18:28,786] INFO spark.jobserver.WebApi [] [] - Starting browser web service... [2018-04-03 11:18:28,800] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:45037] to [Up] [2018-04-03 11:18:29,570] INFO ay.can.server.HttpListener [] [akka://JobServer/user/IO-HTTP/listener-0] - Bound to /0.0.0.0:8090 [2018-04-03 11:28:17,841] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-c9-a848-27d6fe7b3004 [2018-04-03 11:28:17,846] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext1990744388684811148 for context knimeSparkContext [2018-04-03 11:28:17,854] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext1990744388684811148, akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 11:28:19,278] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Node [akka.tcp://JobServer@127.0.0.1:43035] is JOINING, roles [manager] [2018-04-03 11:28:19,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:43035] to [Up] [2018-04-03 11:28:19,586] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:43035/user/* [2018-04-03 11:28:19,588] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$a] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:29:19,606] INFO AkkaClusterSupervisorActor [] [] - Failed to send initialize message to context Actor[akka.tcp://JobServer@127.0.0.1:43035/user/jobManager-c9-a848-27d6fe7b3004#-2127115561] akka.pattern.AskTimeoutException: Timed out at akka.pattern.PromiseActorRef$$anonfun$1.apply$mcV$sp(AskSupport.scala:334) at akka.actor.Scheduler$$anon$11.run(Scheduler.scala:118) at scala.concurrent.Future$InternalCallbackExecutor$.scala$concurrent$Future$InternalCallbackExecutor$$unbatchedExecute(Future.scala:694) at scala.concurrent.Future$InternalCallbackExecutor$.execute(Future.scala:691) at akka.actor.LightArrayRevolverScheduler$TaskHolder.executeTask(Scheduler.scala:455) at akka.actor.LightArrayRevolverScheduler$$anon$12.executeBucket$1(Scheduler.scala:407) at akka.actor.LightArrayRevolverScheduler$$anon$12.nextTick(Scheduler.scala:411) at akka.actor.LightArrayRevolverScheduler$$anon$12.run(Scheduler.scala:363) at java.lang.Thread.run(Thread.java:745) [2018-04-03 11:29:19,608] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Marking node [akka.tcp://JobServer@127.0.0.1:43035] as [Down] [2018-04-03 11:29:19,609] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.ContextSupervisor$ContextInitError] from Actor[akka://JobServer/user/context-supervisor#1659026161] to Actor[akka://JobServer/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:29:20,085] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Ignoring received gossip status from unreachable [UniqueAddress(akka.tcp://JobServer@127.0.0.1:43035,844574256)] [2018-04-03 11:29:20,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:43035] [2018-04-03 11:29:20,537] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:43035] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 11:29:20,547] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A43035-0/endpointWriter/endpointReader-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A43035-0] - Message [akka.remote.transport.AssociationHandle$Disassociated] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A43035-0/endpointWriter/endpointReader-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A43035-0#1456166729] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:29:20,550] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A48132-1] - Message [akka.remote.transport.AssociationHandle$Disassociated] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A48132-1#-10378254] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:29:20,566] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A48254-2] - Message [akka.remote.transport.AssociationHandle$InboundPayload] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A48254-2#-543672894] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:29:20,947] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#974900599] to Actor[akka://JobServer/deadLetters] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:29:21,946] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#974900599] to Actor[akka://JobServer/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:29:22,946] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#974900599] to Actor[akka://JobServer/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:29:23,946] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#974900599] to Actor[akka://JobServer/deadLetters] was not delivered. [8] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:29:24,946] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#974900599] to Actor[akka://JobServer/deadLetters] was not delivered. [9] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:29:25,946] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#974900599] to Actor[akka://JobServer/deadLetters] was not delivered. [10] dead letters encountered, no more dead letters will be logged. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 11:36:59,684] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A43035-1/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:45037] <- [akka.tcp://JobServer@127.0.0.1:43035]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:43035] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:43035 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 11:49:18,350] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-72-bb32-2620d33d40c1 [2018-04-03 11:49:18,351] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext7647197462101421134 for context knimeSparkContext [2018-04-03 11:49:18,352] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext7647197462101421134, akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 11:49:19,811] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Node [akka.tcp://JobServer@127.0.0.1:39724] is JOINING, roles [manager] [2018-04-03 11:49:20,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:39724] to [Up] [2018-04-03 11:49:20,594] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:39724/user/* [2018-04-03 11:49:20,594] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$b] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:49:52,105] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 11:49:52,176] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_c1d68d31852ba93bbe21b51883e769ded6594a8e_spark-1.6.cdh5_9, 398149 bytes [2018-04-03 11:50:22,620] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-72-bb32-2620d33d40c1 [2018-04-03 11:50:22,622] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Marking node [akka.tcp://JobServer@127.0.0.1:39724] as [Down] [2018-04-03 11:50:22,625] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A39724-2/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:45037] <- [akka.tcp://JobServer@127.0.0.1:39724]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:39724] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:39724 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 11:50:23,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:39724] [2018-04-03 11:50:23,536] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:39724] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 11:50:53,113] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-04-03 11:52:08,589] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-b9-bcf1-8ad53676f538 [2018-04-03 11:52:08,589] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext5373686122981276088 for context knimeSparkContext [2018-04-03 11:52:08,590] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext5373686122981276088, akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 11:52:10,068] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Node [akka.tcp://JobServer@127.0.0.1:44108] is JOINING, roles [manager] [2018-04-03 11:52:10,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:44108] to [Up] [2018-04-03 11:52:10,560] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:44108/user/* [2018-04-03 11:52:10,560] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$c] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:52:42,007] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 11:52:55,484] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-04-03 11:58:27,054] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-04-03 11:58:27,057] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Marking node [akka.tcp://JobServer@127.0.0.1:44108] as [Down] [2018-04-03 11:58:27,197] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-b9-bcf1-8ad53676f538 [2018-04-03 11:58:27,206] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A44108-3/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:45037] <- [akka.tcp://JobServer@127.0.0.1:44108]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:44108] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:44108 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 11:58:27,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:44108] [2018-04-03 11:58:27,536] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:44108] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 11:58:29,977] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-96-b130-7cafd2e416b3 [2018-04-03 11:58:29,978] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext1229262153115617133 for context knimeSparkContext [2018-04-03 11:58:29,978] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext1229262153115617133, akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 11:58:31,336] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Node [akka.tcp://JobServer@127.0.0.1:44551] is JOINING, roles [manager] [2018-04-03 11:58:31,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:44551] to [Up] [2018-04-03 11:58:31,560] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:44551/user/* [2018-04-03 11:58:31,560] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$d] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:58:33,952] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:44551/user/jobManager-96-b130-7cafd2e416b3#-2144677864] java.lang.IllegalArgumentException: Required executor memory (5120+512 MB) is above the max threshold (5120 MB) of this cluster! Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or 'yarn.nodemanager.resource.memory-mb'. at org.apache.spark.deploy.yarn.Client.verifyClusterResources(Client.scala:281) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:140) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:151) at org.apache.spark.SparkContext.(SparkContext.scala:538) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-04-03 11:58:33,953] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Marking node [akka.tcp://JobServer@127.0.0.1:44551] as [Down] [2018-04-03 11:58:33,997] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A44551-4/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:45037] <- [akka.tcp://JobServer@127.0.0.1:44551]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:44551] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:44551 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 11:58:34,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:44551] [2018-04-03 11:58:34,537] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:44551] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 11:58:45,369] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-86-8719-cac3760b13d4 [2018-04-03 11:58:45,369] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext3717063637414680412 for context knimeSparkContext [2018-04-03 11:58:45,370] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext3717063637414680412, akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 11:58:46,746] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Node [akka.tcp://JobServer@127.0.0.1:38707] is JOINING, roles [manager] [2018-04-03 11:58:47,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:38707] to [Up] [2018-04-03 11:58:47,595] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:38707/user/* [2018-04-03 11:58:47,595] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$e] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:58:48,979] INFO AkkaClusterSupervisorActor [] [] - Failed to initialize context Actor[akka.tcp://JobServer@127.0.0.1:38707/user/jobManager-86-8719-cac3760b13d4#1101949374] java.lang.NumberFormatException: Size must be specified as bytes (b), kibibytes (k), mebibytes (m), gibibytes (g), tebibytes (t), or pebibytes(p). E.g. 50b, 100k, or 250m. Fractional values are not supported. Input was: 4.5 at org.apache.spark.network.util.JavaUtils.parseByteString(JavaUtils.java:244) at org.apache.spark.network.util.JavaUtils.byteStringAsBytes(JavaUtils.java:255) at org.apache.spark.util.Utils$.memoryStringToMb(Utils.scala:1040) at org.apache.spark.SparkContext$$anonfun$19.apply(SparkContext.scala:506) at org.apache.spark.SparkContext$$anonfun$19.apply(SparkContext.scala:506) at scala.Option.map(Option.scala:145) at org.apache.spark.SparkContext.(SparkContext.scala:506) at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37) at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48) at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:386) at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:129) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorStack$$anonfun$receive$1.applyOrElse(ActorStack.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1$$anonfun$applyOrElse$1.apply$mcV$sp(Slf4jLogging.scala:26) at ooyala.common.akka.Slf4jLogging$class.ooyala$common$akka$Slf4jLogging$$withAkkaSourceLogging(Slf4jLogging.scala:35) at ooyala.common.akka.Slf4jLogging$$anonfun$receive$1.applyOrElse(Slf4jLogging.scala:25) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at ooyala.common.akka.ActorMetrics$$anonfun$receive$1.applyOrElse(ActorMetrics.scala:24) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) [2018-04-03 11:58:48,980] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Marking node [akka.tcp://JobServer@127.0.0.1:38707] as [Down] [2018-04-03 11:58:49,029] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A38707-5/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:45037] <- [akka.tcp://JobServer@127.0.0.1:38707]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:38707] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:38707 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 11:58:49,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:38707] [2018-04-03 11:58:49,536] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:38707] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 11:59:00,566] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-76-8f0f-c226f764f302 [2018-04-03 11:59:00,567] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext6389946451965332973 for context knimeSparkContext [2018-04-03 11:59:00,567] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext6389946451965332973, akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 11:59:01,949] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Node [akka.tcp://JobServer@127.0.0.1:36905] is JOINING, roles [manager] [2018-04-03 11:59:02,537] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:36905] to [Up] [2018-04-03 11:59:02,559] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:36905/user/* [2018-04-03 11:59:02,560] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$f] - Starting actor spark.jobserver.JobResultActor [2018-04-03 11:59:33,898] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 12:00:02,109] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-04-03 12:10:17,631] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-04-03 12:10:17,632] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Marking node [akka.tcp://JobServer@127.0.0.1:36905] as [Down] [2018-04-03 12:10:17,763] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-76-8f0f-c226f764f302 [2018-04-03 12:10:17,767] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A36905-6/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:45037] <- [akka.tcp://JobServer@127.0.0.1:36905]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:36905] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:36905 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 12:10:18,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:36905] [2018-04-03 12:10:18,536] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:36905] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 12:10:20,572] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-f9-bd52-6205dd3a05de [2018-04-03 12:10:20,572] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext798624668079866566 for context knimeSparkContext [2018-04-03 12:10:20,572] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext798624668079866566, akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 12:10:21,943] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Node [akka.tcp://JobServer@127.0.0.1:39616] is JOINING, roles [manager] [2018-04-03 12:10:22,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:39616] to [Up] [2018-04-03 12:10:22,559] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:39616/user/* [2018-04-03 12:10:22,560] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$g] - Starting actor spark.jobserver.JobResultActor [2018-04-03 12:10:53,989] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 12:11:05,952] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-04-03 12:12:01,884] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Shutting down context knimeSparkContext [2018-04-03 12:12:01,884] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Marking node [akka.tcp://JobServer@127.0.0.1:39616] as [Down] [2018-04-03 12:12:02,014] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-f9-bd52-6205dd3a05de [2018-04-03 12:12:02,025] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A39616-7/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:45037] <- [akka.tcp://JobServer@127.0.0.1:39616]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:39616] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:39616 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 12:12:02,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:39616] [2018-04-03 12:12:02,536] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:39616] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 12:12:04,770] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-76-b2c6-71ee342fcd78 [2018-04-03 12:12:04,770] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext5330812187666193012 for context knimeSparkContext [2018-04-03 12:12:04,771] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext5330812187666193012, akka.tcp://JobServer@127.0.0.1:45037] [2018-04-03 12:12:06,191] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Node [akka.tcp://JobServer@127.0.0.1:40257] is JOINING, roles [manager] [2018-04-03 12:12:06,536] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:45037] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:40257] to [Up] [2018-04-03 12:12:06,560] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:40257/user/* [2018-04-03 12:12:06,561] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$h] - Starting actor spark.jobserver.JobResultActor [2018-04-03 12:12:38,066] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 12:13:16,358] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes BUILD-VERSION: Commit 0abe49552a6fcdf6a5aca565d624cda255342c33 on branch 0.6.2-KNIME_cdh-5.13 built on 2017-12-06 18:06:16+01:00 [2018-04-03 21:52:10,706] INFO spark.jobserver.JobServer$ [] [] - Starting JobServer with config { # system properties "ui" : { # system properties "enabled" : "true", # system properties "killEnabled" : "true" }, # system properties "app" : { # system properties "name" : "spark.jobserver.JobServer" }, # application.conf: 5 # spark web UI port "webUrlPort" : 8080, # system properties "submit" : { # system properties "deployMode" : "client" }, # system properties "serializer" : "org.apache.spark.serializer.KryoSerializer", # system properties "lineage" : { # system properties "enabled" : "false", # system properties "log" : { # system properties "dir" : "/var/log/spark/lineage" } }, # system properties "executor" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "uri" : "", # system properties "extraJavaOptions" : "-Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server" }, # merge of /opt/spark-job-server/environment.conf: 25,application.conf: 7 # ######################################################################### # Job server settings # ######################################################################### "jobserver" : { # /opt/spark-job-server/environment.conf: 27 # TCP port that the job server listens on for HTTP requests "port" : 8090, # merge of /opt/spark-job-server/environment.conf: 37,application.conf: 40 "sqldao" : { # /opt/spark-job-server/environment.conf: 38 "rootdir" : "/tmp/spark-job-server/sqldao/data", # application.conf: 42 # Slick database driver, full classpath "slick-driver" : "scala.slick.driver.H2Driver", # /opt/spark-job-server/environment.conf: 41 "dbcp" : { # /opt/spark-job-server/environment.conf: 45 "maxidle" : 10, # /opt/spark-job-server/environment.conf: 43 "maxactive" : 20, # /opt/spark-job-server/environment.conf: 42 "connectonstart" : true, # /opt/spark-job-server/environment.conf: 44 "minidle" : 1, # /opt/spark-job-server/environment.conf: 46 "initialsize" : 10 }, # application.conf: 45 # JDBC driver, full classpath "jdbc-driver" : "org.h2.Driver", # merge of /opt/spark-job-server/environment.conf: 39,application.conf: 52 # Full JDBC URL / init string, along with username and password. Sorry, needs to match above. # Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass "jdbc" : { # application.conf: 55 "password" : "", # application.conf: 54 "user" : "", # /opt/spark-job-server/environment.conf: 39 "url" : "jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db;AUTO_SERVER=TRUE;AUTO_RECONNECT=TRUE" } }, # application.conf: 87 # spark broadcst factory in yarn deployment # Versions prior to 1.1.0, spark default broadcast factory is org.apache.spark.broadcast.HttpBroadcastFactory. # Can't start multiple sparkContexts in the same JVM with HttpBroadcastFactory. "yarn-broadcast-factory" : "org.apache.spark.broadcast.TorrentBroadcastFactory", # /opt/spark-job-server/environment.conf: 52 # Storage directory for files that are uploaded to the server # via POST/data commands "datadao" : { # /opt/spark-job-server/environment.conf: 53 "rootdir" : "/tmp/spark-job-server/upload" }, # application.conf: 9 "bind-address" : "0.0.0.0", # /opt/spark-job-server/environment.conf: 65 # Timeouts for Spark context creation in seconds. In "yarn-client" mode, use # the first option, in all other modes, use the second one. If context # creation takes longer than the timeout, the jobserver closes the context. # Especially in yarn-client mode, context creation includes container # allocation, which can take a while. "yarn-context-creation-timeout" : "60 s", # /opt/spark-job-server/environment.conf: 66 "context-creation-timeout" : "60 s", # application.conf: 69 # The ask pattern timeout for Api "short-timeout" : "3 s", # /opt/spark-job-server/environment.conf: 58 # Number of jobs that can be run simultaneously per context. If not set, # defaults to number of cores on machine where Spark job server is running. "max-jobs-per-context" : 100, # /opt/spark-job-server/environment.conf: 36 # Class to use to persist data such as jars, applications, jobs, etc. # spark.jobserver.io.JobFileDAO uses the file system for persistence # spark.jobserver.io.JobSqlDAO uses an SQL database for persistence # "jobdao" : "spark.jobserver.io.JobSqlDAO", # /opt/spark-job-server/environment.conf: 30 # Directory where the job server stores uploaded jar files "jar-store-rootdir" : "/tmp/spark-job-server/jars", # application.conf: 24 "filedao" : { # application.conf: 25 "rootdir" : "/tmp/spark-jobserver/filedao/data" }, # application.conf: 15 # Number of job results to keep per JobResultActor/context "job-result-cache-size" : 5000, # /opt/spark-job-server/environment.conf: 71 # If true, a separate JVM is forked for each Spark context. # KERBEROS NOTE: In a Kerberos-enabled environment, you should set this to true # (as well as shiro authentication, see later in this file). "context-per-jvm" : true, # application.conf: 75 # Time out for job server to wait while creating named objects "named-object-creation-timeout" : "60 s" }, # merge of /opt/spark-job-server/environment.conf: 90,application.conf: 103 # Default settings for Spark contexts. These settings can be overridden on a # per-context basis. Please consult the Spark documentation for more details # on available settings. # Default settings for ad hoc as well as manually created contexts # You can add any Spark config params here, for example, spark.mesos.coarse = true "context-settings" : { # application.conf: 111 "streaming" : { # application.conf: 116 # if true, stops gracefully by waiting for the processing of all received data to be completed "stopGracefully" : true, # application.conf: 120 # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be # stopped regardless of whether the StreamingContext has been started. "stopSparkContext" : true, # application.conf: 113 # Default batch interval for Spark Streaming contexts in milliseconds "batch_interval" : 1000 }, # application.conf: 127 # Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize "context-init-timeout" : "60s", # application.conf: 109 # A zero-arg class implementing spark.jobserver.context.SparkContextFactory # Determines the type of jobs that can run in a SparkContext "context-factory" : "spark.jobserver.context.DefaultSparkContextFactory", # application.conf: 129 "passthrough" : { # application.conf: 130 "spark" : { # application.conf: 130 "driver" : { # application.conf: 130 "allowMultipleContexts" : true } } }, # /opt/spark-job-server/environment.conf: 106 # Required setting, that sets "spark.executor.memory" in the SparkConf. Sets # the amount of memory to use per Spark executor process, in the same format # as JVM memory strings (e.g. 512m, 2g). # # Note: In "yarn-client" mode, Spark will request this amount of memory per YARN # container, plus some additional overhead. "memory-per-node" : "1G", # /opt/spark-job-server/environment.conf: 98 # Required setting, that sets "spark.cores.max" in the SparkConf. This sets # the maximum amount of CPU cores to request for the Spark on the cluster # (not from each machine). # # IMPORTANT: Note that although required by job-server, this setting only # has an effect in Standalone and Mesos clusters. "num-cpu-cores" : 2 }, # /opt/spark-job-server/environment.conf: 82 # Sets the "spark.master" property in the SparkConf. KNIME recommends # "yarn-client" for production use and "local[4]" for debugging purposes. # master = "local[4]" # Run Spark locally with 4 worker threads "master" : "yarn-client", # system properties "dynamicAllocation" : { # system properties "enabled" : "true", # system properties "minExecutors" : "0", # system properties "executorIdleTimeout" : "60", # system properties "schedulerBacklogTimeout" : "1" }, # system properties "authenticate" : { # system properties "enableSaslEncryption" : "false" }, # system properties "sql" : { # system properties "queryExecutionListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener" }, # system properties "shuffle" : { # system properties "encryption" : { # system properties "enabled" : "false" }, # system properties "service" : { # system properties "port" : "7337", # system properties "enabled" : "true" } }, # system properties "eventLog" : { # system properties "enabled" : "true", # system properties "dir" : "hdfs://ip-172-31-34-104.us-west-2.compute.internal:8020/user/spark/applicationHistory" }, # system properties "driver" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native", # system properties "extraJavaOptions" : "-XX:+UseConcMarkSweepGC\n -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:/opt/spark-job-server/gc.out\n -XX:MaxPermSize=512m\n -XX:+CMSClassUnloadingEnabled -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true -Dlog4j.configuration=file:/opt/spark-job-server/log4j-server.properties -DLOG_DIR=/var/log/spark-job-server -Dspark.executor.uri= ", # system properties "memory" : "2G" }, # application.conf: 93 # predefined Spark contexts # Below is an example, but do not uncomment it. Everything defined here is carried over to # deploy-time configs, so they will be created in all environments. :( "contexts" : {}, # system properties "extraListeners" : "com.cloudera.spark.lineage.ClouderaNavigatorListener", # system properties "yarn" : { # system properties "am" : { # system properties "extraLibraryPath" : "/usr/lib/hadoop/lib/native" }, # system properties "jar" : "local:/usr/lib/spark/lib/spark-assembly.jar", # system properties "historyServer" : { # system properties "allowTracking" : "true", # system properties "address" : "http://ip-172-31-34-104.us-west-2.compute.internal:18088" } }, # system properties "jars" : "file:/opt/spark-job-server/spark-job-server.jar" } [2018-04-03 21:52:10,709] INFO spark.jobserver.JobServer$ [] [] - Spray config: { # reference.conf: 1478 # After a `Timedout` message has been sent to the timeout handler and the # request still hasn't been completed after the time period set here # the server will complete the request itself with an error response. # Set to `infinite` to disable timeout timeouts. "timeout-timeout" : "2 s", # application.conf: 187 # ssl engine provider protocols "enabledProtocols" : [ # application.conf: 187 "SSLv3", # application.conf: 187 "TLSv1" ], # application.conf: 190 "pipelining-limit" : 2, # reference.conf: 1575 # The time period within which a connection handler must have been # registered after the bind handler has received a `Connected` event. # Set to `infinite` to disable. "registration-timeout" : "1s", # /opt/spark-job-server/environment.conf: 154 # parsing { # max-content-length = 200m # } "request-timeout" : "60 s", # reference.conf: 1600 "back-pressure" : { # reference.conf: 1604 # The reciprocal rate of requested Acks per NoAcks. E.g. the default value # '10' means that every 10th write request is acknowledged. This affects the # number of writes each connection has to buffer even in absence of back-pressure. "noack-rate" : 10, # reference.conf: 1609 # The lower limit the write queue size has to shrink to before reads are resumed. # Use 'infinite' to disable the low-watermark so that reading is resumed instantly # after the next successful write. "reading-low-watermark" : "infinite" }, # reference.conf: 1570 # The time period within which the TCP unbinding process must be completed. # Set to `infinite` to disable. "unbind-timeout" : "1s", # reference.conf: 1500 # Enables/disables support for statistics collection and querying. # Even though stats keeping overhead is small, # for maximum performance switch off when not needed. "stats-support" : "on", # reference.conf: 1490 # The path of the actor to send `spray.http.Timedout` messages to. # If empty all `Timedout` messages will go to the "regular" request # handling actor. "timeout-handler" : "", # /opt/spark-job-server/environment.conf: 155 "idle-timeout" : "120 s", # /opt/spark-job-server/environment.conf: 156 "request-chunk-aggregation-limit" : "200m", # reference.conf: 1449 # Enables/disables SSL encryption. # If enabled the server uses the implicit `ServerSSLEngineProvider` member # of the `Bind` command to create `SSLEngine` instances for the underlying # IO connection. "ssl-encryption" : "off", # application.conf: 192 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge) # Needed for HTTP/1.0 requests with missing Host headers "default-host-header" : "spray.io:8765", # reference.conf: 1504 # Enables/disables the addition of a `Remote-Address` header # holding the clients (remote) IP address. "remote-address-header" : "off", # reference.conf: 1584 # The time after which a connection is aborted (RST) after a parsing error # occurred. The timeout prevents a connection which is already known to be # erroneous from receiving evermore data even if all of the data will be ignored. # However, in case of a connection abortion the client usually doesn't properly # receive the error response. This timeout is a trade-off which allows the client # some time to finish its request and receive a proper error response before the # connection is forcibly closed to free resources. "parsing-error-abort-timeout" : "2s", # merge of application.conf: 195,reference.conf: 1750 # The (default) configuration of the HTTP message parser for the server and # the client. # IMPORTANT: These settings (i.e. children of `spray.can.parsing`) can't be directly # overridden in `application.conf` to change the parser settings for client and server # altogether (see https://github.com/spray/spray/issues/346). Instead, override the # concrete settings beneath `spray.can.server.parsing` and `spray.can.client.parsing` # where these settings are copied to. "parsing" : { # reference.conf: 1758 "max-chunk-ext-length" : 256, # reference.conf: 1759 "max-chunk-size" : "1m", # application.conf: 195 # Increase this in order to upload bigger job jars "max-content-length" : "30m", # reference.conf: 1756 "max-header-count" : 64, # reference.conf: 1773 # Sets the strictness mode for parsing request target URIs. # The following values are defined: # # `strict`: RFC3986-compliant URIs are required, # a 400 response is triggered on violations # # `relaxed`: all visible 7-Bit ASCII chars are allowed # # `relaxed-with-raw-query`: like `relaxed` but additionally # the URI query is not parsed, but delivered as one raw string # as the `key` value of a single Query structure element. # "uri-parsing-mode" : "strict", # reference.conf: 1753 "max-response-reason-length" : 64, # reference.conf: 1783 # Enables/disables the logging of warning messages in case an incoming # message (request or response) contains an HTTP header which cannot be # parsed into its high-level model class due to incompatible syntax. # Note that, independently of this settings, spray will accept messages # with such headers as long as the message as a whole would still be legal # under the HTTP specification even without this header. # If a header cannot be parsed into a high-level model instance it will be # provided as a `RawHeader`. "illegal-header-warnings" : "on", # reference.conf: 1755 "max-header-value-length" : "8k", # reference.conf: 1787 # limits for the number of different values per header type that the # header cache will hold "header-cache" : { # reference.conf: 1792 "If-Modified-Since" : 0, # reference.conf: 1794 "If-Range" : 0, # reference.conf: 1788 "default" : 12, # reference.conf: 1790 "Date" : 0, # reference.conf: 1789 "Content-MD5" : 0, # reference.conf: 1796 "User-Agent" : 32, # reference.conf: 1795 "If-Unmodified-Since" : 0, # reference.conf: 1793 "If-None-Match" : 0, # reference.conf: 1791 "If-Match" : 0 }, # reference.conf: 1752 # The limits for the various parts of the HTTP message parser. "max-uri-length" : "2k", # reference.conf: 1754 "max-header-name-length" : 64, # reference.conf: 1807 # Enables/disables inclusion of an SSL-Session-Info header in parsed # messages over SSL transports (i.e., HttpRequest on server side and # HttpResponse on client side). "ssl-session-info-header" : "off", # reference.conf: 1802 # Sets the size starting from which incoming http-messages will be delivered # in chunks regardless of whether chunking is actually used on the wire. # Set to infinite to disable auto chunking. "incoming-auto-chunking-threshold-size" : "infinite" }, # reference.conf: 1566 # The time period within which the TCP binding process must be completed. # Set to `infinite` to disable. "bind-timeout" : "1s", # reference.conf: 1536 # Enables/disables the returning of more detailed error messages to # the client in the error response. # Should be disabled for browser-facing APIs due to the risk of XSS attacks # and (probably) enabled for internal or non-browser APIs. # Note that spray will always produce log messages containing the full # error details. "verbose-error-messages" : "off", # reference.conf: 1516 # Enables/disables automatic handling of HEAD requests. # If this setting is enabled the server dispatches HEAD requests as GET # requests to the application and automatically strips off all message # bodies from outgoing responses. # Note that, even when this setting is off the server will never send # out message bodies on responses to HEAD requests. "transparent-head-requests" : "on", # application.conf: 183 "keystoreType" : "JKS", # reference.conf: 1541 # Enables/disables the logging of the full (potentially multiple line) # error message to the server logs. # If disabled only a single line will be logged. "verbose-error-logging" : "off", # reference.conf: 1598 # Enables/disables automatic back-pressure handling by write buffering and # receive throttling "automatic-back-pressure-handling" : "on", # reference.conf: 1443-2058 # Always contains the deployed version of spray. # Referenced, for example, from the `spray.can.server.server-header` setting. "server-header" : "spray-can/1.2.3", # application.conf: 185 # key manager factory provider "provider" : "SunX509", # reference.conf: 1613 # Enables more verbose DEBUG logging for debugging SSL related issues. "ssl-tracing" : "off", # reference.conf: 1562 # For HTTPS connections this setting specified the maximum number of # bytes that are encrypted in one go. Large responses are broken down in # chunks of this size so as to already begin sending before the response has # been encrypted entirely. "max-encryption-chunk-size" : "1m", # reference.conf: 1508 # Enables/disables the addition of a `Raw-Request-URI` header holding the # original raw request URI as the client has sent it. "raw-request-uri-header" : "off", # application.conf: 182 # see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples # typical are either SSL or TLS "encryptionType" : "SSL", # reference.conf: 1485 # The period during which a service must respond to a `ChunkedRequestStart` message # with a `RegisterChunkHandler` message. During the registration period reading from # the network is suspended. It is still possible that some chunks have already been # received which will be buffered until the registration is received or the timeout is # triggered. If the timeout is triggered the connection is immediately aborted. "chunkhandler-registration-timeout" : "500 ms", # reference.conf: 1556 # The initial size if the buffer to render the response headers in. # Can be used for fine-tuning response rendering performance but probably # doesn't have to be fiddled with in most applications. "response-header-size-hint" : 512, # reference.conf: 1528 # Enables/disables an alternative response streaming mode that doesn't # use `Transfer-Encoding: chunked` but rather renders the individual # MessageChunks coming in from the application as parts of the original # response entity. # Enabling this mode causes all connections to be closed after a streaming # response has been finished since there is no other way to signal the # response end to the client. # Note that chunkless-streaming is implicitly enabled when streaming # responses to HTTP/1.0 clients (since they don't support # `Transfer-Encoding: chunked`) "chunkless-streaming" : "off", # reference.conf: 1495 # The "granularity" of timeout checking for both idle connections timeouts # as well as request timeouts, should rarely be needed to modify. # If set to `infinite` request and connection timeout checking is disabled. "reaping-cycle" : "250 ms" } [2018-04-03 21:52:11,288] INFO ka.event.slf4j.Slf4jLogger [] [] - Slf4jLogger started [2018-04-03 21:52:11,542] INFO Remoting [] [Remoting] - Starting remoting [2018-04-03 21:52:11,847] INFO Remoting [] [Remoting] - Remoting started; listening on addresses :[akka.tcp://JobServer@127.0.0.1:37451] [2018-04-03 21:52:11,849] INFO Remoting [] [Remoting] - Remoting now listens on addresses: [akka.tcp://JobServer@127.0.0.1:37451] [2018-04-03 21:52:11,878] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Starting up... [2018-04-03 21:52:11,983] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Registered cluster JMX MBean [akka:type=Cluster] [2018-04-03 21:52:11,984] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Started up successfully [2018-04-03 21:52:12,006] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - No seed-nodes configured, manual cluster join required [2018-04-03 21:52:12,905] INFO ark.jobserver.io.JobSqlDAO [] [] - rootDir is /tmp/spark-job-server/sqldao/data [2018-04-03 21:52:13,091] INFO ternal.util.VersionPrinter [] [] - Flyway 3.2.1 by Boxfuse [2018-04-03 21:52:13,733] INFO dbsupport.DbSupportFactory [] [] - Database: jdbc:h2:/tmp/spark-job-server/sqldao/data/h2-db (H2 1.3) [2018-04-03 21:52:13,909] INFO nternal.command.DbValidate [] [] - Validated 1 migration (execution time 00:00.069s) [2018-04-03 21:52:13,926] INFO atatable.MetaDataTableImpl [] [] - Creating Metadata table: "PUBLIC"."schema_version" [2018-04-03 21:52:13,943] INFO internal.command.DbMigrate [] [] - Current version of schema "PUBLIC": << Empty Schema >> [2018-04-03 21:52:13,943] INFO internal.command.DbMigrate [] [] - Migrating schema "PUBLIC" to version 0.7.0 - init tables [2018-04-03 21:52:13,974] INFO internal.command.DbMigrate [] [] - Successfully applied 1 migration to schema "PUBLIC" (execution time 00:00.050s). [2018-04-03 21:52:14,082] INFO ark.jobserver.io.JobSqlDAO [] [] - Running dummy query to initialize dbcp. [2018-04-03 21:52:15,039] INFO k.jobserver.io.JobDAOActor [] [akka://JobServer/user/dao-manager] - Starting actor spark.jobserver.io.JobDAOActor [2018-04-03 21:52:15,056] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Starting actor spark.jobserver.DataManagerActor [2018-04-03 21:52:15,059] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Starting actor spark.jobserver.JarManager [2018-04-03 21:52:15,062] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting actor spark.jobserver.AkkaClusterSupervisorActor [2018-04-03 21:52:15,065] INFO ark.jobserver.JobInfoActor [] [akka://JobServer/user/job-info] - Starting actor spark.jobserver.JobInfoActor [2018-04-03 21:52:15,108] INFO AkkaClusterSupervisorActor [] [] - AkkaClusterSupervisor initialized on akka.tcp://JobServer@127.0.0.1:37451 [2018-04-03 21:52:15,108] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/global-result-actor] - Starting actor spark.jobserver.JobResultActor [2018-04-03 21:52:15,140] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Node [akka.tcp://JobServer@127.0.0.1:37451] is JOINING, roles [supervisor] [2018-04-03 21:52:15,469] INFO spark.jobserver.WebApi [] [] - No authentication. [2018-04-03 21:52:15,771] INFO spark.jobserver.WebApi [] [] - Starting browser web service... [2018-04-03 21:52:16,070] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:37451] to [Up] [2018-04-03 21:52:17,104] INFO ay.can.server.HttpListener [] [akka://JobServer/user/IO-HTTP/listener-0] - Bound to /0.0.0.0:8090 [2018-04-03 21:52:17,108] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.io.Tcp$Bound] from Actor[akka://JobServer/user/IO-HTTP/listener-0#139983251] to Actor[akka://JobServer/deadLetters] was not delivered. [1] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:09:52,816] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-2c-a76d-87224c8bd90c [2018-04-03 22:09:52,827] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext357006239163163847 for context knimeSparkContext [2018-04-03 22:09:52,840] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext357006239163163847, akka.tcp://JobServer@127.0.0.1:37451] [2018-04-03 22:09:54,269] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Node [akka.tcp://JobServer@127.0.0.1:33690] is JOINING, roles [manager] [2018-04-03 22:09:54,740] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:33690] to [Up] [2018-04-03 22:09:54,788] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:33690/user/* [2018-04-03 22:09:54,790] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$a] - Starting actor spark.jobserver.JobResultActor [2018-04-03 22:10:26,408] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 22:10:26,482] INFO spark.jobserver.JarManager [] [akka://JobServer/user/jar-manager] - Storing jar for app knimeJobs_013620532e91f948_65dffd0366301f75b3c2dabe274a502a4f448c88_spark-1.6.cdh5_9, 398150 bytes [2018-04-03 22:10:26,982] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#18820537] to Actor[akka://JobServer/deadLetters] was not delivered. [2] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:11:02,957] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-04-03 22:11:03,075] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [spark.jobserver.JobInfoActor$JobConfigStored$] from Actor[akka://JobServer/user/job-info#18820537] to Actor[akka://JobServer/deadLetters] was not delivered. [3] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:11:05,025] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-2c-a76d-87224c8bd90c [2018-04-03 22:11:05,028] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Marking node [akka.tcp://JobServer@127.0.0.1:33690] as [Down] [2018-04-03 22:11:05,041] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A37650-1] - Message [akka.remote.transport.AssociationHandle$Disassociated] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A37650-1#-693158169] was not delivered. [4] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:11:05,060] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A33690-0/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:37451] <- [akka.tcp://JobServer@127.0.0.1:33690]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:33690] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:33690 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 22:11:05,060] INFO akka.actor.LocalActorRef [] [akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A37650-1] - Message [akka.remote.transport.ActorTransportAdapter$DisassociateUnderlying] from Actor[akka://JobServer/deadLetters] to Actor[akka://JobServer/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2FJobServer%40127.0.0.1%3A37650-1#-693158169] was not delivered. [5] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:11:05,734] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:33690] [2018-04-03 22:11:05,735] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:33690] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 22:11:05,884] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#65430477] to Actor[akka://JobServer/deadLetters] was not delivered. [6] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:11:06,883] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#65430477] to Actor[akka://JobServer/deadLetters] was not delivered. [7] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:11:07,884] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#65430477] to Actor[akka://JobServer/deadLetters] was not delivered. [8] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:11:08,883] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#65430477] to Actor[akka://JobServer/deadLetters] was not delivered. [9] dead letters encountered. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:11:09,883] INFO r$RemoteDeadLetterActorRef [] [akka://JobServer/deadLetters] - Message [akka.cluster.ClusterHeartbeatReceiver$EndHeartbeat] from Actor[akka://JobServer/system/cluster/core/daemon/heartbeatSender#65430477] to Actor[akka://JobServer/deadLetters] was not delivered. [10] dead letters encountered, no more dead letters will be logged. This logging can be turned off or adjusted with configuration settings 'akka.log-dead-letters' and 'akka.log-dead-letters-during-shutdown'. [2018-04-03 22:14:36,025] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-19-90af-860f5e28665b [2018-04-03 22:14:36,025] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext4904026815027454913 for context knimeSparkContext [2018-04-03 22:14:36,026] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext4904026815027454913, akka.tcp://JobServer@127.0.0.1:37451] [2018-04-03 22:14:37,391] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Node [akka.tcp://JobServer@127.0.0.1:41701] is JOINING, roles [manager] [2018-04-03 22:14:37,734] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:41701] to [Up] [2018-04-03 22:14:37,758] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:41701/user/* [2018-04-03 22:14:37,758] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$b] - Starting actor spark.jobserver.JobResultActor [2018-04-03 22:15:09,213] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 22:15:10,639] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-04-03 22:15:46,450] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-19-90af-860f5e28665b [2018-04-03 22:15:46,451] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Marking node [akka.tcp://JobServer@127.0.0.1:41701] as [Down] [2018-04-03 22:15:46,460] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A41701-1/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:37451] <- [akka.tcp://JobServer@127.0.0.1:41701]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:41701] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:41701 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 22:15:46,733] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:41701] [2018-04-03 22:15:46,734] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:41701] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 22:16:36,743] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-0a-8d69-702c2b49e45b [2018-04-03 22:16:36,743] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext6789448764192250377 for context knimeSparkContext [2018-04-03 22:16:36,744] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext6789448764192250377, akka.tcp://JobServer@127.0.0.1:37451] [2018-04-03 22:16:38,220] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Node [akka.tcp://JobServer@127.0.0.1:39371] is JOINING, roles [manager] [2018-04-03 22:16:38,734] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:39371] to [Up] [2018-04-03 22:16:38,760] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:39371/user/* [2018-04-03 22:16:38,761] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$c] - Starting actor spark.jobserver.JobResultActor [2018-04-03 22:17:10,038] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 22:17:40,520] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-04-03 22:17:47,278] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-0a-8d69-702c2b49e45b [2018-04-03 22:17:47,278] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Marking node [akka.tcp://JobServer@127.0.0.1:39371] as [Down] [2018-04-03 22:17:47,288] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A39371-2/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:37451] <- [akka.tcp://JobServer@127.0.0.1:39371]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:39371] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:39371 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 22:17:47,734] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:39371] [2018-04-03 22:17:47,734] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:39371] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 22:19:03,197] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-95-aba4-79fc5ef762dc [2018-04-03 22:19:03,197] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext6227224847260639485 for context knimeSparkContext [2018-04-03 22:19:03,197] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext6227224847260639485, akka.tcp://JobServer@127.0.0.1:37451] [2018-04-03 22:19:04,527] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Node [akka.tcp://JobServer@127.0.0.1:40508] is JOINING, roles [manager] [2018-04-03 22:19:04,734] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:40508] to [Up] [2018-04-03 22:19:04,761] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:40508/user/* [2018-04-03 22:19:04,762] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$d] - Starting actor spark.jobserver.JobResultActor [2018-04-03 22:19:36,254] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 22:19:37,580] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-04-03 22:20:07,408] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-95-aba4-79fc5ef762dc [2018-04-03 22:20:07,409] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Marking node [akka.tcp://JobServer@127.0.0.1:40508] as [Down] [2018-04-03 22:20:07,413] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A40508-3/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:37451] <- [akka.tcp://JobServer@127.0.0.1:40508]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:40508] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:40508 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 22:20:07,734] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:40508] [2018-04-03 22:20:07,734] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:40508] is now quarantined, all messages to this address will be delivered to dead letters. [2018-04-03 22:23:06,991] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting context with actor name jobManager-80-94da-544ef9eb21cf [2018-04-03 22:23:06,992] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Created working directory /var/log/spark-job-server/jobserver-knimeSparkContext739182012723538576 for context knimeSparkContext [2018-04-03 22:23:06,992] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Starting to execute sub process [./manager_start.sh, /var/log/spark-job-server/jobserver-knimeSparkContext739182012723538576, akka.tcp://JobServer@127.0.0.1:37451] [2018-04-03 22:23:08,374] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Node [akka.tcp://JobServer@127.0.0.1:32948] is JOINING, roles [manager] [2018-04-03 22:23:08,734] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is moving node [akka.tcp://JobServer@127.0.0.1:32948] to [Up] [2018-04-03 22:23:08,762] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Received identify response, attempting to initialize context at akka.tcp://JobServer@127.0.0.1:32948/user/* [2018-04-03 22:23:08,763] INFO k.jobserver.JobResultActor [] [akka://JobServer/user/context-supervisor/$e] - Starting actor spark.jobserver.JobResultActor [2018-04-03 22:23:40,234] INFO AkkaClusterSupervisorActor [] [] - SparkContext knimeSparkContext joined [2018-04-03 22:23:41,754] INFO jobserver.DataManagerActor [] [akka://JobServer/user/data-manager] - Storing data in file prefix mysql-connector-java-5.1.39-bin.jar, 989497 bytes [2018-04-03 22:24:16,413] INFO AkkaClusterSupervisorActor [] [akka://JobServer/user/context-supervisor] - Actor terminated: jobManager-80-94da-544ef9eb21cf [2018-04-03 22:24:16,413] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Marking node [akka.tcp://JobServer@127.0.0.1:32948] as [Down] [2018-04-03 22:24:16,423] ERROR akka.remote.EndpointWriter [] [akka://JobServer/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FJobServer%40127.0.0.1%3A32948-4/endpointWriter] - AssociationError [akka.tcp://JobServer@127.0.0.1:37451] <- [akka.tcp://JobServer@127.0.0.1:32948]: Error [Shut down address: akka.tcp://JobServer@127.0.0.1:32948] [ akka.remote.ShutDownAssociation: Shut down address: akka.tcp://JobServer@127.0.0.1:32948 Caused by: akka.remote.transport.Transport$InvalidAssociationException: The remote system terminated the association because it is shutting down. ] [2018-04-03 22:24:16,733] INFO Cluster(akka://JobServer) [] [Cluster(akka://JobServer)] - Cluster Node [akka.tcp://JobServer@127.0.0.1:37451] - Leader is removing unreachable node [akka.tcp://JobServer@127.0.0.1:32948] [2018-04-03 22:24:16,733] INFO Remoting [] [Remoting] - Address [akka.tcp://JobServer@127.0.0.1:32948] is now quarantined, all messages to this address will be delivered to dead letters.