; Sample worker node parameters [sample] parameter = Sample Parameter iterations = 10 sleep_sec = 2 ; General purpose worker node parameters [server] ; Maximum number of jobs(threads) can be served simultaneously. ; This parameter defines job parallelism. For computationally intensive ; algorithms this value should not be more than number of CPUs ; if set to 'auto', the node will determine the number of CPUs on the ; system and use this number. ; Default value is 'auto' ; max_threads = 4 ; initial number of threads created for incoming jobs ; Default number is 1. ; init_threads = 1 ; TCP/IP and UDP port number for control messages (like shutdown, version) ; and job notifications. It runs special control thread for incoming ; administrative requests (from netschedule_control and netschedule_admin) ; Can take a ports range (ex. 9300-9310). In this case the system will try ; to find an available port in the given range control_port = 9300 ; Server side logging. A worker node can ask its context if this flag is ; set to true log = true ; Internal. ; Delay in seconds node task dispatcher waits for free ; space in the job queue. Lower value usually gives better response ; to shutdown command (CPU traded off) thread_pool_timeout = 5 ; Time worker node spends waiting for new jobs without connecting to ; the netschedule server queue. Server sends UPD requests to wake the ; node up. Bigger values of this parameter reduces the netschedule server ; load in expense of job delivery latency (because of ; potential loss of UDP packages) ; job_wait_timeout = 10 ; The max total number of jobs after which the node will shutdown itself. ; Restarting the node periodically is useful due to accumulating heap ; fragmentation possible leaks etc. ; default is 0 - means unlimited number of jobs. ; max_total_jobs = 100 ; The max number of failed jobs after which the node will shutdown itself. ; default is 0 - means unlimited number of failed jobs. ; max_failed_jobs = 3 ; Sets the maximum number of jobs with the same client IP address running in parallel. ; While this limit is reached for a client IP address, the worker node immediately returns ; all new jobs with the same client IP address back to the queue also blacklisting them. ; Blacklisted jobs will not be given again to the same worker node for some time (server configured). ; ; Default value is 0, which means that jobs per client IP address are not limited ; ;max_jobs_per_client_ip = 10 ; Sets the maximum number of jobs with the same session ID running in parallel. ; While this limit is reached for a session ID, the worker node immediately returns ; all new jobs with the same session ID back to the queue also blacklisting them. ; Blacklisted jobs will not be given again to the same worker node for some time (server configured). ; ; Default value is 0, which means that jobs per session ID are not limited ; ;max_jobs_per_session_id = 10 ; Sets the maximum limit for total memory consumption by this worker node. ; When this limit is reached, the worker node shuts down. ; The value can contain multiple-byte units: G(giga-), MB(mega-), KiB (kibi-) etc. ; ; Default value is 0, which means that memory consumption is not limited ; ;total_memory_limit = 1.5GB ; Sets the maximum limit for total runtime of this worker node (in seconds). ; When this limit is reached, the worker node shuts down. ; ; Default value is 0, which means that runtime is not limited ; ;total_time_limit = 3600 ; Allows this worker node to detect infinite loops in job execution. ; If a job is being executed for more than the specified time, ; it is assumed to be stuck in an infinite loop. ; If this happens, the worker node enters shutdown mode: ; After all other running jobs are either done or also assumed stuck, ; the worker node shuts down (may end up killing itself). ; ; Default value is 0, which means that the node will not detect infinite loops. ; ;infinite_loop_time = 600 ; When true, server transforms into a daemon, detached from the ; current program group (for UNIX only) ;daemon = true ; The list of worker nodes which this node will check before attempting ; to retrieve a job from the NetSchedule queue. If at least one of these ; worker nodes has an ideal thread, this node will not connect to the queue ; for a job. This node and all nodes from the given list must be connected ; to the same NetSchedule service, the same queue and must run the same job. ; If the list is empty (defult) then this node is a master. ;master_nodes = service3:9300, service2:9300 ; List of network hosts which are allowed admin access to the worker node ; if this worker node is controled by grid_node_watcher.sh don't forget to ; to add "localhost" to this list. By default any host is allowed. ;admin_hosts = localhost service1 service2 service3 ; Time delay (in seconds) between the node enters an idle mode (all jobs are done ; and there are no new jobs in the queue) and the idle task gets executed. ; Can not be less then 1 sec. Default value is 30 sec. ;idle_run_delay = 30 ; The node will automatically shut itself down if it is idle for ; a continuous period of time longer than this (in seconds): ; Default: 0 - means never auto shutdown ; auto_shutdown_if_idle = 20 ; Specifies if the framework should reuse an instance of the job class. ; Setting this parameter to true means that only one instance of the job class ; will be create per each execution thread. False means that an instance of job ; class will be created per each incoming job. ; Default value is false. ;reuse_job_object = true ; Time in seconds. Specifies how often the node should check states of jobs ; it is processing. It is used as a feedback from the client to see if it wants ; to cancel the job execution check_status_period = 4 ; Default timeout before the job is terminated in case of pullback. ; This value can be overridden by the '--timeout' option specified ; with 'grid_cli suspend --pullback'. default_pullback_timeout = 0 ; Specifies how often (in seconds) the worker node retries to commit a job after communication errors. ; The worker node gives up retrying either after it takes more than queue timeout (server configured) ; or if server successfully receives corresponding commit request but responds with an error. ; ; Minimum allowed value is 1 second, default is 2 seconds. ; ;commit_job_interval = 5 ; If set to true, the worker node forks at start. ; Parent process is only used to clear the node on servers on exit and child process does everything else. ; Thus, the node is realiably cleared even if child process crashes/is killed (for UNIX only). ; ; Default value is false (no forking). ; ;reliable_cleanup = true [gw_debug] ; Prefix for all debug files run_name = debug_run ; Whether to gather requests or to execute them ;mode = gather ; Whether to gather requests or to execute them ; default is 1. ; Input blobs will be dumped to "..[1..N].inp" ; Output blobs will be dumped to "..[1..N].out" ; Messages will be dumped to "..[1..N].msg" ; Diagnostics will be written to "..log" ;gather_nrequests = 2 ; Whether to gather requests or to execute them ;mode = execute ; Process requests from the following files ; [no default] ; Input blobs will be read from "" ; Output blobs will be dumped to "__.execute.out" ; Messages will be dumped to "__.execute.msg" ; Diagnostics will be written to "__.execute.log" ;execute_requests = [ ...] ; NetSchedule client configuration ; [netschedule_api] client_name = node_sample ; Specifies NetSchedule service. Format: lb_service_name or host:port service = NS_test ; Name of the queue (DO NOT use default queue for your application) queue_name = sample ; Instructs the clients to try to fit its output onto the netschedule's ; internal storage (its size is usually about 2Kb). If the data does not fit there ; it will be saved into an external blob storage (netcahe). ; Default value is true ;use_embedded_storage = true ; Time interval in seconds when client is automatically rebalanced rebalance_time = 10 ; Number of requests before rebalancing rebalance_requests = 100 ; Sets a communication timeout (in seconds) between a client and a server ; Default is 12 secs ;communication_timeout = 20 ; Network storage (NetCache) configuration ; [netcache_api] client_name = node_sample ; LB service name service = NC_test ; If this parameter is set to true then before the job is processed the input data blob ; will be stored into a file on the local file system and the job will get a stream to ; that file. ; Default is false cache_input = false ; If this parameter is set to true then the job will get a stream to file on the local ; file system as an output stream. After job is done the content of this file will be ; stored to a blob in the storage. ; Default is false cache_output = false ; If 'cache_input' or 'cache_output' are set to true, sets a temporary directory where ; cached input/ouput is stored. ; Default: . ; tmp_dir = /tmp ; Synonym for tmp_dir. if both are set tmp_dir is used. ; tmp_path = /tmp ; Time interval in seconds when client is automatically rebalanced rebalance_time = 10 ; Number of requests before rebalancing rebalance_requests = 100 ; Sets a communication timeout (in seconds) between a client and a server ; Default is 12 secs ;communication_timeout = 20 ; Sets a communication protocol. Supported protocols are 'simple' and 'persistent' ; 'simple' protocol establishes a new connection to the NetCache server for each request. ; 'persistent' protocol supports a permanent connection to the server (requires NetCache ; server version 2.5.9 and high) ; Default is 'simple' ;protocol = persistent ; If this parameter is set to true, blob key contains service name and ; server (listed in the blob key) is not present in that service, ; then do not try to connect to that server. ; If set to 'auto' and key has a "Check-Server" hint set to NO, ; then assume 'server_check = no'; otherwise, assume 'server_check = yes'. ; Otherwise, unconditionally try to connect to the server. ; Default is 'auto' ;server_check = auto ; This is a hint for the blob readers that use 'server_check = auto'. ; If set to true, blob readers are advised to pre-check the server which is listed in the blob key. ; Default is true ;server_check_hint = true