[remote_app]
; The application that is run by remote_app when it receives a job.
app_path="./job_processor.sh"

; A relative or absolute pathname of the application that must be periodically
; run when the node is in idle mode (no jobs are being executed).
;
;idle_app_cmd="./idle_task.sh"

; Maximum application run time in seconds.
; If the job runs longer than that, remote_app terminates it.
; Default: 0 (no time limit)
;
max_app_run_time=0

; Time slice in seconds by which job expiration is extended while the job is
; running.
;
; This parameter is zero by default, which means that job lifetime is not
; extended while the job is running. For long running jobs, it is recommended
; to set this parameter to a non-zero value.
;
keep_alive_period=60

; Specifies what to do with the job when the job-processing application
; terminates with a non-zero exit code.
;
; Available actions: done - the job will be marked as done
;                    fail - the job will be failed with an error message
;                    return - the job will be returned back to the queue
;
; Default action is 'done'
;
;non_zero_exit_action = fail

; Specifies whether a separate temporary directory must be created for each
; run of the job-processing application.
;
; When the application terminates, this directory is automatically deleted
; unless 'remove_tmp_dir' (see below) is set to false.
;
; Default is false.
;
run_in_separate_dir=true

; If 'run_in_separate_dir' is set to true, 'tmp_path' defines the location of
; the top-level temporary directory and, optionally, the format of the names
; of its per-job subdirectories.
;
; This parameter allows the following substitutions:
;   * %q - NetSchedule queue name
;   * %j - the ID of the job being processed
;   * %t - the current UNIX Epoch time in seconds
;   * %r - the current request number
;   * %% - substituted with a single '%' character
; If none of the substitutions is given, the following default
; combination is used: ${tmp_path}/%q_%j_%t
;
;tmp_path=/tmp

; Synonym for 'tmp_path'. If both are set, 'tmp_dir' is used.
;
;tmp_dir=/tmp

; If 'run_in_separate_dir is set to true, this parameter specifies whether the
; temporary directory created for each job must be deleted after the job has
; finished.
;
; Default is true.
;
;remove_tmp_dir = false

; Specifies a number of subsequent attempts to make asynchronously to remove
; a temporary directory (all attempts incl. first are done in a separate thread).
; Zero means a single immediate synchronous try (in the same thread).
;
; Default value is 60 attempts
;
;max_remove_tmp_attempts = 60

; Specifies how often (not earlier than) those removing attempts occur.
; Cannot be less than 1 second (will be adjusted to 1 second otherwise).
;
; Default value is every 60 seconds
;
;sleep_between_remove_tmp_attempts = 60

; If 'run_in_separate_dir' is set to true, specifies whether the standard
; output and standard error output must be cached in the working directory
; prior to sending them to NetCache.
;
; This prevents network timeouts for long-running jobs that produce
; significant amount of data.
;
; Default is true.
;
;cache_std_out_err = false

; Specifies a job execution monitor (or watcher) application. This application
; can be used to check on the job-progressing application or for progress
; report.
;
; The watcher runs periodically during the job-processing application
; execution (see 'monitor_period').
;
; The following parameters are passed to the monitor program:
;  -pid pid  - real application process id
;  -jid jid  - job id which the real application is processing.
;  -jwdir dir - the application's working directory
;
; The exit code of the monitor program is interpreted as follows:
;   0: The job is running as expected. The monitor's stdout is interpreted as
;      a job progress message.  The stderr goes to the log file if logging is
;      enabled.
;   1: The monitor detected an inconsistency with the job run; the job must be
;      returned back to the queue.  The monitor's stderr goes to the log file
;      regardless of whether logging is enabled or not.
;   2: The job must be failed. The monitor's stdout is interpreted as the
;      error message; stderr goes to the log file regardless of whether
;      logging is enabled or not.
;   3: There's a problem with the monitor application itself. The job
;      continues to run and the monitor's stderr goes to the log file
;      unconditionally.
;monitor_app_path=./monitor.sh

; Maximum monitor run time in seconds. If the monitor application does not
; finish within the specified time frame, it is terminated.
;
; The default is 5 seconds.
;
;max_monitor_running_time = 3

; Specifies how often the monitor application must be run.
;
; Default period is every 5 seconds.
;
;monitor_period = 10

; Wait time in seconds between the "soft" and the "hard" attempts to terminate
; the job processor.
;
; Default value is 1 second.
;
;kill_timeout = 5

; Specifies number of attempts to be made to reap (wait for) a child process
; after that process failed to be killed.
;
; Default value is 60 attempts
;
;max_reap_attempts_after_kill = 60

; Specifies how often (not earlier than) those reaping attempts occur
;
; Default value is every 60 seconds
;
;sleep_between_reap_attempts = 60

; A command to request version of application that is run by remote_app
;
; Default value is equal to 'app_path' parameter value
;
;version_cmd = cat

; Space-separated command-line arguments to pass to 'version_cmd' command
;
; Default value is '-version'
;
;version_args = ./my_app_version.txt

; Specifies if application timeout is reported (in job's progress message).
; It's when an app does not finish in 'max_app_run_time' seconds (if set).
;
; Possible values:
;   smart - report only if there is no progress message already reported earlier
;   always - always report, even if it involves overriding
;           an existing progress message (reported earlier)
;   never - never report
;
; Default value is 'smart'.
;
;progress_message_on_timeout = smart

; Specifies what exit codes force jobs to fail with no retries.
;
; The parameter must be in the following format (in ascending order):
;     [!] R1, N2, ..., Rn
;
; Where:
;     !         - negation, makes all provided ranges be excluded
;                 (not included).
;     R1 ... Rn - integer closed ranges specified either as 
;                 FROM - TO (corresponds to [FROM, TO] range) or as
;                 NUMBER (corresponds to [NUMBER, NUMBER] range).
; Example:
;     4, 6 - 9, 16 - 40, 64
; 
;fail_no_retries_if_exit_code = ! 0 - 10

; Environment variables handling:
; 1. Take the local environment.
; 2. Add or override all entries from [env_set] section.
[env_set]
; LD_LIBRARY_PATH=.


; Generic worker node parameters
[server]

; Maximum number of jobs that can be served simultaneously (each job occupies
; one thread). For computationally intensive algorithms, this value should
; not exceed the number of CPU cores.
;
; Default value is 'auto', which means that the value is automatically
; detected based on the number of CPU cores available in the system.
;
;max_threads=auto

; Initial number of worker threads.
;
; Default number is 1.
;
;init_threads=1

; TCP port number for administrative access. This parameter can be specified
; as a range, in which case the worker node will choose the first available
; port from the given range.
;
control_port=9300

; Whether to enable verbose logging.
;
log=true

; Whether to log progress message changes generated by the monitor
; application. False by default.
;
;log_progress=false

; Internal.
; Lower values give better response to the shutdown command when all
; working threads are occupied.
;
thread_pool_timeout=5

; Time in seconds between two consecutive attempts to get a job for execution.
; In between these attempts, the worker node waits on its UDP port for a
; notification from the server. If UDP is blocked by the firewall, this value
; may need to be lowered.
;
; Default value is 30 seconds.
;
job_wait_timeout=10

; The maximum number of jobs the worker node is allowed to process before must
; shut itself down.
; Restarting the node periodically is sometimes useful because of heap
; fragmentation, possible memory leaks, etc.
;
; The default value is 0, which means that worker node lifetime is not limited
; by the number of jobs it processes.
;
;max_total_jobs = 100

; The maximum number of jobs the worker node is allowed to fail before must
; shut itself down.
;
; The default value is 0, which means that worker node lifetime is not limited
; by the number of jobs it fails.
;
;max_failed_jobs = 3

; Sets the maximum number of jobs with the same client IP address running in parallel.
; While this limit is reached for a client IP address, the worker node immediately returns
; all new jobs with the same client IP address back to the queue also blacklisting them.
; Blacklisted jobs will not be given again to the same worker node for some time (server configured).
;
; Default value is 0, which means that jobs per client IP address are not limited
;
;max_jobs_per_client_ip = 10

; Sets the maximum number of jobs with the same session ID running in parallel.
; While this limit is reached for a session ID, the worker node immediately returns
; all new jobs with the same session ID back to the queue also blacklisting them.
; Blacklisted jobs will not be given again to the same worker node for some time (server configured).
;
; Default value is 0, which means that jobs per session ID are not limited
;
;max_jobs_per_session_id = 10

; Sets the maximum limit for total memory consumption by this worker node.
; When this limit is reached, the worker node shuts down.
; The value can contain multiple-byte units: G(giga-), MB(mega-), KiB (kibi-) etc.
;
; Default value is 0, which means that memory consumption is not limited
;
;total_memory_limit = 1.5GB

; Sets the maximum limit for total runtime of this worker node (in seconds).
; When this limit is reached, the worker node shuts down.
;
; Default value is 0, which means that runtime is not limited
;
;total_time_limit = 3600

; Allows this worker node to detect infinite loops in job execution.
; If a job is being executed for more than the specified time,
; it is assumed to be stuck in an infinite loop.
; If this happens, the worker node enters shutdown mode:
; After all other running jobs are either done or also assumed stuck,
; the worker node shuts down (may end up killing itself).
;
; Default value is 0, which means that the node will not detect infinite loops.
;
;infinite_loop_time = 600

; (POSIX systems only) Whether the worker node must daemonize after startup.
;
;daemon=true

; The list of worker nodes that this node must check before attempting
; to retrieve a job from the NetSchedule queue. If at least one of these
; worker nodes has an idle thread, this node will not connect to the queue
; to get a job. This node and all nodes from the given list must be connected
; to the same NetSchedule service and the same queue.
;
;master_nodes = server1:9300, server2:9300

; List of network hosts from which administrative access to this worker node
; is allowed. Note that 'localhost' must be explicitly added to this list if
; needed.
;
; By default, administrative access from any host is granted.
;
;admin_hosts = localhost service1 service2 service3

; Timeout in seconds before the node enters idle mode and the idle task is
; executed. The node is considered idle when all jobs are done and there are
; no new jobs in the queue.
;
; Default value is 30 sec.
;
;idle_run_delay = 30

; Whether and when the worker node must shut itself down if it's idle.
; If the node is idle longer than the specified number of seconds, it will
; automatically shut down.
;
; Default: 0 (no automatic shutdown)
;
;auto_shutdown_if_idle = 0

; Specifies that the worker node must immediately exit as soon as
; it receives a shutdown request, without waiting for the worker
; threads to finish running their jobs.
;force_exit = false

; Specifies if the framework must reuse an instance of the job class.  Setting
; this parameter to true means that only one instance of the job class will be
; create per each execution thread. False means that a new instance of the job
; class will be created for each incoming job.
;
; Default value is false.
;
reuse_job_object = true

; Specifies how often the node must check the status of a running job.
; This parameter affects how soon the job-processing application will be
; terminated if the job is canceled.
;
check_status_period = 4

; Default timeout before the job is terminated in case of pullback.
; This value can be overridden by the '--timeout' option specified
; with 'grid_cli suspend --pullback'.
;
default_pullback_timeout = 0

; Specifies how often (in seconds) the worker node retries to commit a job after communication errors.
; The worker node gives up retrying either after it takes more than queue timeout (server configured)
; or if server successfully receives corresponding commit request but responds with an error.
;
; Minimum allowed value is 1 second, default is 2 seconds.
;
;commit_job_interval = 5

; If set to true, the worker node forks at start.
; Parent process is only used to clear the node on servers on exit and child process does everything else.
; Thus, the node is realiably cleared even if child process crashes/is killed (for UNIX only).
;
; Default value is false (no forking).
;
;reliable_cleanup = true

; NetSchedule client configuration
;
[netschedule_api]

; Specifies how the node must introduce itself to the NetSchedule servers.
;
client_name=

; Specifies NetSchedule service. It can be either an LBSM service or a
; host:port address.
;
service=NS_test

; The name of the job queue, which can be either allocated by a Grid
; administrator or created dynamically using 'grid_cli createqueue'.
;
queue_name=

; How often to query LBSM (time in seconds).
;
rebalance_time=10

; How often to query LBSM (number of internal LBSM requests before
; rebalancing).
;
rebalance_requests=100

; Instructs the worker node to use the internal NetSchedule storage. The size
; of this internal storage is queue-specific, but usually is about 256Kb.
; If job output does not fit there, it will be saved to NetCache.
;
; Default value is false.
;
use_embedded_storage = true

; Use affinity information when requesting jobs.
;
; Default value is false.
;
use_affinities = false

; Initial set of preferred affinities.
; Initial (comma/space separated) list of preferred affinities.
; Example: job_type_a, job_type_b
;
; Default value is empty.
;
;affinity_list =

; A prioritized lists of affinities, which overrides the default
; job processing order.
; Cannot be used with affinity_list.
;
; Example: high_priority_job, mid_priority_job, low_priority_job
;
; Default value is empty.
;
;affinity_ladder =

; Use affinity information and accept new affinities automatically.
; Cannot be used with affinity_ladder.
;
; Default value is false.
;
claim_new_affinities = false

; Allow the worker node to process jobs without affinities as well as
; jobs with "non-preferred" affinities.
; Cannot be used in combination with 'claim_new_affinities'.
;
; Default value is false.
;
process_any_job = false

; Communication timeout (in seconds) for connections to the NetSchedule
; servers.
;
; Default is 12 seconds.
;
;communication_timeout = 20

; Sets a communication timeout (in seconds) for accessing the first server
; in a service while submitting a job. If the first server does not reply
; within the specified amount of time, the next server will be tried, but
; the second and all subsequent servers will be given the full
; communication_timeout to reply. If LBSM services are not used or there's
; only one server in the service, this parameter does not apply.
;
; Default is [netschedule_api]/communication_timeout if defined,
; or 300ms (0.3s)
;
;first_server_timeout = 1


; Network storage (NetCache) configuration
;
[netcache_api]

; NetCache client identification -- must match [netschedule_api].client_name.
;
client_name=

; NetCache service to use. It can be either an LBSM service or a
; host:port address.
;
service=NC_test

; When true, job input data will be read from NetCache and stored as a file on
; a local file system before the job is run. This is to avoid possible
; connection timeouts when the job processor takes too much time to read its
; input.
;
; Default is false
;
cache_input = false

; When true, job output will be collected in a local file prior to sending it
; to NetCache when the job has finished. This is to avoid possible connection
; timeouts when the job is long running and produces bursts of data with long
; intervals in between.
;
; Default is false
;
cache_output = true

; If either 'cache_input' or 'cache_output' is enabled, this parameter
; defines a temporary directory where the cached input/output is stored.
;
; Default: .
;
;tmp_dir=/tmp

; Synonym for 'tmp_dir'. If both are set, 'tmp_dir' is used.
;
;tmp_path=/tmp

; How often to query LBSM (time interval in seconds).
;
rebalance_time=10

; How often to query LBSM (number of internal LBSM requests before
; rebalancing).
;
rebalance_requests=100

; Communication timeout (in seconds) for connections to the NetCache
; servers.
;
; Default is 12 seconds.
;
;communication_timeout = 20

; Sets a communication timeout (in seconds) for accessing the first server
; in a service while creating a blob or performing any other operation with
; a mirrored blob. If the first server does not reply within the specified
; amount of time, the next server will be tried, but the second and all
; subsequent servers will be given the full communication_timeout to reply.
; If LBSM services are not used or there's only one server in the service,
; this parameter does not apply.
;
; Default is [netcache_api]/communication_timeout if defined,
; or 300ms (0.3s)
;
;first_server_timeout = 1


; Throttling parameters. Described here for NetCache, but the same
; set of parameters can be defined in the [netschedule_api] section.

; If that many attempts to connect to a server occurs in a row,
; then requests to the server will be throttled.
; Setting this parameter to zero disables this throttling criterion.
;  Default = 0
;
;throttle_by_subsequent_connection_failures = 3


; If at least "5" of the last "20" attempts to connect to a server did not
; succeed, then requests to the server will be throttled.
; Setting any of the parameters to zero disables this throttling criterion.
;  Default = 0/0
;
;throttle_by_connection_error_rate = 5/20


; Seconds to wait before the server can be accessed again (after requests to it
; have been throttled).
; Setting this parameter to zero or negative value will disable the throttling
; algorithm (and, all other "throttle_*" parameters will be ignored).
;  Default = 0
;
;throttle_relaxation_period = 12


; If connecting to a server (with all retries and delays) takes more than the
; specified amount of time, the connection will be failed.
; Setting this parameter to zero or negative value removes the restriction.
;  Default = 0.0
;
;max_connection_time = 23.4


; If the NC server pool is load-balanced, then do not unthrottle server
; unless (and until) it is present in the LBSM service table.
;  Default = false
;
;throttle_hold_until_active_in_lb = true