|
|
@@ -21,17 +21,21 @@ cluster_name: 'Test Cluster'
|
|
|
#
|
|
|
# If you already have a cluster with 1 token per node, and wish to migrate to
|
|
|
# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
|
|
|
-num_tokens: 256
|
|
|
+# num_tokens: 256
|
|
|
|
|
|
-# initial_token allows you to specify tokens manually. While you can use # it with
|
|
|
-# vnodes (num_tokens > 1, above) -- in which case you should provide a
|
|
|
-# comma-separated list -- it's primarily used when adding nodes # to legacy clusters
|
|
|
-# that do not have vnodes enabled.
|
|
|
-# initial_token:
|
|
|
+# If you haven't specified num_tokens, or have set it to the default of 1 then
|
|
|
+# you should always specify InitialToken when setting up a production
|
|
|
+# cluster for the first time, and often when adding capacity later.
|
|
|
+# The principle is that each node should be given an equal slice of
|
|
|
+# the token ring; see http://wiki.apache.org/cassandra/Operations
|
|
|
+# for more details.
|
|
|
+#
|
|
|
+# If blank, Cassandra will request a token bisecting the range of
|
|
|
+# the heaviest-loaded existing node. If there is no load information
|
|
|
+# available, such as is the case with a new cluster, it will pick
|
|
|
+# a random token, which will lead to hot spots.
|
|
|
+initial_token:
|
|
|
|
|
|
-# May either be "true" or "false" to enable globally, or contain a list
|
|
|
-# of data centers to enable per-datacenter.
|
|
|
-# hinted_handoff_enabled: DC1,DC2
|
|
|
# See http://wiki.apache.org/cassandra/HintedHandoff
|
|
|
hinted_handoff_enabled: true
|
|
|
# this defines the maximum amount of time a dead host will have hints
|
|
|
@@ -78,16 +82,27 @@ authorizer: AllowAllAuthorizer
|
|
|
# Will be disabled automatically for AllowAllAuthorizer.
|
|
|
permissions_validity_in_ms: 2000
|
|
|
|
|
|
-# The partitioner is responsible for distributing groups of rows (by
|
|
|
-# partition key) across nodes in the cluster. You should leave this
|
|
|
-# alone for new clusters. The partitioner can NOT be changed without
|
|
|
-# reloading all data, so when upgrading you should set this to the
|
|
|
-# same partitioner you were already using.
|
|
|
-#
|
|
|
-# Besides Murmur3Partitioner, partitioners included for backwards
|
|
|
-# compatibility include RandomPartitioner, ByteOrderedPartitioner, and
|
|
|
-# OrderPreservingPartitioner.
|
|
|
-#
|
|
|
+# The partitioner is responsible for distributing rows (by key) across
|
|
|
+# nodes in the cluster. Any IPartitioner may be used, including your
|
|
|
+# own as long as it is on the classpath. Out of the box, Cassandra
|
|
|
+# provides org.apache.cassandra.dht.{Murmur3Partitioner, RandomPartitioner
|
|
|
+# ByteOrderedPartitioner, OrderPreservingPartitioner (deprecated)}.
|
|
|
+#
|
|
|
+# - RandomPartitioner distributes rows across the cluster evenly by md5.
|
|
|
+# This is the default prior to 1.2 and is retained for compatibility.
|
|
|
+# - Murmur3Partitioner is similar to RandomPartioner but uses Murmur3_128
|
|
|
+# Hash Function instead of md5. When in doubt, this is the best option.
|
|
|
+# - ByteOrderedPartitioner orders rows lexically by key bytes. BOP allows
|
|
|
+# scanning rows in key order, but the ordering can generate hot spots
|
|
|
+# for sequential insertion workloads.
|
|
|
+# - OrderPreservingPartitioner is an obsolete form of BOP, that stores
|
|
|
+# - keys in a less-efficient format and only works with keys that are
|
|
|
+# UTF8-encoded Strings.
|
|
|
+# - CollatingOPP collates according to EN,US rules rather than lexical byte
|
|
|
+# ordering. Use this as an example if you need custom collation.
|
|
|
+#
|
|
|
+# See http://wiki.apache.org/cassandra/Operations for more on
|
|
|
+# partitioners and token selection.
|
|
|
partitioner: org.apache.cassandra.dht.Murmur3Partitioner
|
|
|
|
|
|
# Directories where Cassandra should store data on disk. Cassandra
|
|
|
@@ -100,7 +115,6 @@ data_file_directories:
|
|
|
commitlog_directory: /var/lib/cassandra/commitlog
|
|
|
|
|
|
# policy for data disk failures:
|
|
|
-# stop_paranoid: shut down gossip and Thrift even for single-sstable errors.
|
|
|
# stop: shut down gossip and Thrift, leaving the node effectively dead, but
|
|
|
# can still be inspected via JMX.
|
|
|
# best_effort: stop using the failed disk and respond to requests based on
|
|
|
@@ -109,14 +123,6 @@ commitlog_directory: /var/lib/cassandra/commitlog
|
|
|
# ignore: ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
|
|
|
disk_failure_policy: stop
|
|
|
|
|
|
-# policy for commit disk failures:
|
|
|
-# stop: shut down gossip and Thrift, leaving the node effectively dead, but
|
|
|
-# can still be inspected via JMX.
|
|
|
-# stop_commit: shutdown the commit log, letting writes collect but
|
|
|
-# continuing to service reads, as in pre-2.0.5 Cassandra
|
|
|
-# ignore: ignore fatal errors and let the batches fail
|
|
|
-commit_failure_policy: stop
|
|
|
-
|
|
|
# Maximum size of the key cache in memory.
|
|
|
#
|
|
|
# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
|
|
|
@@ -167,18 +173,6 @@ row_cache_save_period: 0
|
|
|
# Disabled by default, meaning all keys are going to be saved
|
|
|
# row_cache_keys_to_save: 100
|
|
|
|
|
|
-# The off-heap memory allocator. Affects storage engine metadata as
|
|
|
-# well as caches. Experiments show that JEMAlloc saves some memory
|
|
|
-# than the native GCC allocator (i.e., JEMalloc is more
|
|
|
-# fragmentation-resistant).
|
|
|
-#
|
|
|
-# Supported values are: NativeAllocator, JEMallocAllocator
|
|
|
-#
|
|
|
-# If you intend to use JEMallocAllocator you have to install JEMalloc as library and
|
|
|
-# modify cassandra-env.sh as directed in the file.
|
|
|
-#
|
|
|
-# Defaults to NativeAllocator
|
|
|
-# memory_allocator: NativeAllocator
|
|
|
|
|
|
# saved caches
|
|
|
saved_caches_directory: /var/lib/cassandra/saved_caches
|
|
|
@@ -226,6 +220,14 @@ seed_provider:
|
|
|
# Ex: "<ip1>,<ip2>,<ip3>"
|
|
|
- seeds: "127.0.0.1"
|
|
|
|
|
|
+# emergency pressure valve #2: the first time heap usage after a full
|
|
|
+# (CMS) garbage collection is above this fraction of the max,
|
|
|
+# Cassandra will reduce cache maximum _capacity_ to the given fraction
|
|
|
+# of the current _size_. Should usually be set substantially above
|
|
|
+# flush_largest_memtables_at, since that will have less long-term
|
|
|
+# impact on the system.
|
|
|
+#
|
|
|
+
|
|
|
# For workloads with more data than can fit in memory, Cassandra's
|
|
|
# bottleneck will be reads that need to fetch data from
|
|
|
# disk. "concurrent_reads" should be set to (16 * number_of_drives) in
|
|
|
@@ -238,13 +240,9 @@ seed_provider:
|
|
|
concurrent_reads: 32
|
|
|
concurrent_writes: 32
|
|
|
|
|
|
-# Total memory to use for sstable-reading buffers. Defaults to
|
|
|
-# the smaller of 1/4 of heap or 512MB.
|
|
|
-# file_cache_size_in_mb: 512
|
|
|
-
|
|
|
# Total memory to use for memtables. Cassandra will flush the largest
|
|
|
# memtable when this much memory is used.
|
|
|
-# If omitted, Cassandra will set it to 1/4 of the heap.
|
|
|
+# If omitted, Cassandra will set it to 1/3 of the heap.
|
|
|
# memtable_total_space_in_mb: 2048
|
|
|
|
|
|
# Total space to use for commitlogs. Since commitlog segments are
|
|
|
@@ -310,15 +308,13 @@ listen_address: localhost
|
|
|
start_native_transport: true
|
|
|
# port for the CQL native transport to listen for clients on
|
|
|
native_transport_port: 9042
|
|
|
-# The maximum threads for handling requests when the native transport is used.
|
|
|
-# This is similar to rpc_max_threads though the default differs slightly (and
|
|
|
-# there is no native_transport_min_threads, idle threads will always be stopped
|
|
|
-# after 30 seconds).
|
|
|
+# The minimum and maximum threads for handling requests when the native
|
|
|
+# transport is used. They are similar to rpc_min_threads and rpc_max_threads,
|
|
|
+# though the defaults differ slightly.
|
|
|
+# NOTE: native_transport_min_threads is now deprecated and ignored (but kept
|
|
|
+# in the 1.2.x series for compatibility sake).
|
|
|
+# native_transport_min_threads: 16
|
|
|
# native_transport_max_threads: 128
|
|
|
-#
|
|
|
-# The maximum size of allowed frame. Frame (requests) larger than this will
|
|
|
-# be rejected as invalid. The default is 256MB.
|
|
|
-# native_transport_max_frame_size_in_mb: 256
|
|
|
|
|
|
# Whether to start the thrift rpc server.
|
|
|
start_rpc: true
|
|
|
@@ -336,10 +332,10 @@ rpc_address: localhost
|
|
|
# port for Thrift to listen for clients on
|
|
|
rpc_port: 9160
|
|
|
|
|
|
-# enable or disable keepalive on rpc connections
|
|
|
+# enable or disable keepalive on rpc/native connections
|
|
|
rpc_keepalive: true
|
|
|
|
|
|
-# Cassandra provides two out-of-the-box options for the RPC Server:
|
|
|
+# Cassandra provides three out-of-the-box options for the RPC Server:
|
|
|
#
|
|
|
# sync -> One thread per thrift connection. For a very large number of clients, memory
|
|
|
# will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
|
|
|
@@ -408,18 +404,6 @@ snapshot_before_compaction: false
|
|
|
# lose data on truncation or drop.
|
|
|
auto_snapshot: true
|
|
|
|
|
|
-# When executing a scan, within or across a partition, we need to keep the
|
|
|
-# tombstones seen in memory so we can return them to the coordinator, which
|
|
|
-# will use them to make sure other replicas also know about the deleted rows.
|
|
|
-# With workloads that generate a lot of tombstones, this can cause performance
|
|
|
-# problems and even exaust the server heap.
|
|
|
-# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
|
|
|
-# Adjust the thresholds here if you understand the dangers and want to
|
|
|
-# scan more tombstones anyway. These thresholds may also be adjusted at runtime
|
|
|
-# using the StorageService mbean.
|
|
|
-tombstone_warn_threshold: 1000
|
|
|
-tombstone_failure_threshold: 100000
|
|
|
-
|
|
|
# Add column indexes to a row after its contents reach this size.
|
|
|
# Increase if your column values are large, or if you have a very large
|
|
|
# number of columns. The competing causes are, Cassandra has to
|
|
|
@@ -429,11 +413,6 @@ tombstone_failure_threshold: 100000
|
|
|
# that wastefully either.
|
|
|
column_index_size_in_kb: 64
|
|
|
|
|
|
-
|
|
|
-# Log WARN on any batch size exceeding this value. 5kb per batch by default.
|
|
|
-# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
|
|
|
-batch_size_warn_threshold_in_kb: 5
|
|
|
-
|
|
|
# Size limit for rows being compacted in memory. Larger rows will spill
|
|
|
# over to disk and use a slower two-pass compaction process. A message
|
|
|
# will be logged specifying the row key.
|
|
|
@@ -480,14 +459,11 @@ compaction_preheat_key_cache: true
|
|
|
# stream_throughput_outbound_megabits_per_sec: 200
|
|
|
|
|
|
# How long the coordinator should wait for read operations to complete
|
|
|
-read_request_timeout_in_ms: 5000
|
|
|
+read_request_timeout_in_ms: 10000
|
|
|
# How long the coordinator should wait for seq or index scans to complete
|
|
|
range_request_timeout_in_ms: 10000
|
|
|
# How long the coordinator should wait for writes to complete
|
|
|
-write_request_timeout_in_ms: 2000
|
|
|
-# How long a coordinator should continue to retry a CAS operation
|
|
|
-# that contends with other proposals for the same row
|
|
|
-cas_contention_timeout_in_ms: 1000
|
|
|
+write_request_timeout_in_ms: 10000
|
|
|
# How long the coordinator should wait for truncates to complete
|
|
|
# (This can be much longer, because unless auto_snapshot is disabled
|
|
|
# we need to flush first so we can snapshot before removing the data.)
|
|
|
@@ -496,10 +472,8 @@ truncate_request_timeout_in_ms: 60000
|
|
|
request_timeout_in_ms: 10000
|
|
|
|
|
|
# Enable operation timeout information exchange between nodes to accurately
|
|
|
-# measure request timeouts. If disabled, replicas will assume that requests
|
|
|
-# were forwarded to them instantly by the coordinator, which means that
|
|
|
-# under overload conditions we will waste that much extra time processing
|
|
|
-# already-timed-out requests.
|
|
|
+# measure request timeouts, If disabled cassandra will assuming the request
|
|
|
+# was forwarded to the replica instantly by the coordinator
|
|
|
#
|
|
|
# Warning: before enabling this property make sure to ntp is installed
|
|
|
# and the times are synchronized between the nodes.
|
|
|
@@ -532,18 +506,23 @@ cross_node_timeout: false
|
|
|
#
|
|
|
# Out of the box, Cassandra provides
|
|
|
# - SimpleSnitch:
|
|
|
-# Treats Strategy order as proximity. This can improve cache
|
|
|
-# locality when disabling read repair. Only appropriate for
|
|
|
-# single-datacenter deployments.
|
|
|
-# - GossipingPropertyFileSnitch
|
|
|
-# This should be your go-to snitch for production use. The rack
|
|
|
-# and datacenter for the local node are defined in
|
|
|
-# cassandra-rackdc.properties and propagated to other nodes via
|
|
|
-# gossip. If cassandra-topology.properties exists, it is used as a
|
|
|
-# fallback, allowing migration from the PropertyFileSnitch.
|
|
|
+# Treats Strategy order as proximity. This improves cache locality
|
|
|
+# when disabling read repair, which can further improve throughput.
|
|
|
+# Only appropriate for single-datacenter deployments.
|
|
|
# - PropertyFileSnitch:
|
|
|
# Proximity is determined by rack and data center, which are
|
|
|
# explicitly configured in cassandra-topology.properties.
|
|
|
+# - GossipingPropertyFileSnitch
|
|
|
+# The rack and datacenter for the local node are defined in
|
|
|
+# cassandra-rackdc.properties and propagated to other nodes via gossip. If
|
|
|
+# cassandra-topology.properties exists, it is used as a fallback, allowing
|
|
|
+# migration from the PropertyFileSnitch.
|
|
|
+# - RackInferringSnitch:
|
|
|
+# Proximity is determined by rack and data center, which are
|
|
|
+# assumed to correspond to the 3rd and 2nd octet of each node's
|
|
|
+# IP address, respectively. Unless this happens to match your
|
|
|
+# deployment conventions (as it did Facebook's), this is best used
|
|
|
+# as an example of writing a custom Snitch class.
|
|
|
# - Ec2Snitch:
|
|
|
# Appropriate for EC2 deployments in a single Region. Loads Region
|
|
|
# and Availability Zone information from the EC2 API. The Region is
|
|
|
@@ -557,12 +536,6 @@ cross_node_timeout: false
|
|
|
# ssl_storage_port on the public IP firewall. (For intra-Region
|
|
|
# traffic, Cassandra will switch to the private IP after
|
|
|
# establishing a connection.)
|
|
|
-# - RackInferringSnitch:
|
|
|
-# Proximity is determined by rack and data center, which are
|
|
|
-# assumed to correspond to the 3rd and 2nd octet of each node's IP
|
|
|
-# address, respectively. Unless this happens to match your
|
|
|
-# deployment conventions, this is best used as an example of
|
|
|
-# writing a custom Snitch class and is provided in that spirit.
|
|
|
#
|
|
|
# You can use a custom Snitch by setting this to the full class name
|
|
|
# of the snitch, which will be assumed to be on your classpath.
|
|
|
@@ -623,6 +596,18 @@ request_scheduler: org.apache.cassandra.scheduler.NoScheduler
|
|
|
# the request scheduling. Currently the only valid option is keyspace.
|
|
|
# request_scheduler_id: keyspace
|
|
|
|
|
|
+# index_interval controls the sampling of entries from the primrary
|
|
|
+# row index in terms of space versus time. The larger the interval,
|
|
|
+# the smaller and less effective the sampling will be. In technicial
|
|
|
+# terms, the interval coresponds to the number of index entries that
|
|
|
+# are skipped between taking each sample. All the sampled entries
|
|
|
+# must fit in memory. Generally, a value between 128 and 512 here
|
|
|
+# coupled with a large key cache size on CFs results in the best trade
|
|
|
+# offs. This value is not often changed, however if you have many
|
|
|
+# very small rows (many to an OS page), then increasing this will
|
|
|
+# often lower memory usage without a impact on performance.
|
|
|
+index_interval: 128
|
|
|
+
|
|
|
# Enable or disable inter-node encryption
|
|
|
# Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
|
|
|
# users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
|
|
|
@@ -664,7 +649,6 @@ client_encryption_options:
|
|
|
# protocol: TLS
|
|
|
# algorithm: SunX509
|
|
|
# store_type: JKS
|
|
|
- # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
|
|
|
|
|
|
# internode_compression controls whether traffic between nodes is
|
|
|
# compressed.
|
|
|
@@ -677,10 +661,4 @@ internode_compression: all
|
|
|
# Disabling it will result in larger (but fewer) network packets being sent,
|
|
|
# reducing overhead from the TCP protocol itself, at the cost of increasing
|
|
|
# latency if you block for cross-datacenter responses.
|
|
|
-inter_dc_tcp_nodelay: false
|
|
|
-
|
|
|
-# Enable or disable kernel page cache preheating from contents of the key cache after compaction.
|
|
|
-# When enabled it would preheat only first "page" (4KB) of each row to optimize
|
|
|
-# for sequential access. Note: This could be harmful for fat rows, see CASSANDRA-4937
|
|
|
-# for further details on that topic.
|
|
|
-preheat_kernel_page_cache: false
|
|
|
+inter_dc_tcp_nodelay: true
|