Skip to content

Commit

Permalink
Merge branch 'main' into upgrade-go
Browse files Browse the repository at this point in the history
  • Loading branch information
pierrehilbert authored Jul 18, 2023
2 parents e3548fc + b481143 commit 3daa61e
Show file tree
Hide file tree
Showing 87 changed files with 3,502 additions and 844 deletions.
17 changes: 12 additions & 5 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,10 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- Make sure k8s watchers are closed when closing k8s meta processor. {pull}35630[35630]
- Upgraded apache arrow library used in x-pack/libbeat/reader/parquet from v11 to v12.0.1 in order to fix cross-compilation issues {pull}35640[35640]
- Fix panic when MaxRetryInterval is specified, but RetryInterval is not {pull}35820[35820]


- Do not print context cancelled error message when running under agent {pull}36006[36006]
- Fix recovering from invalid output configuration when running under Elastic-Agent {pull}36016[36016]
- Improve StreamBuf append to improve performance when reading long lines from files. {pull}35928[35928]
- Eliminate cloning of event in deepUpdate {pull}35945[35945]

*Auditbeat*

Expand Down Expand Up @@ -144,6 +146,9 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- Fix metric collection in GCPPubSub input. {pull}35773[35773]
- Fix end point deregistration in http_endpoint input. {issue}35899[35899] {pull}35903[35903]
- Fix duplicate ID panic in filestream metrics. {issue}35964[35964] {pull}35972[35972]
- Improve error reporting and fix IPv6 handling of TCP and UDP metric collection. {pull}35996[35996]
- Fix handling of NUL-terminated log lines in Fortinet Firewall module. {issue}36026[36026] {pull}36027[36027]
- Make redact field configuration recommended in CEL input and log warning if missing. {pull}36008[36008]

*Heartbeat*

Expand Down Expand Up @@ -342,6 +347,9 @@ automatic splitting at root level, if root level element is an array. {pull}3415
- Add device support for Azure AD entity analytics. {pull}35807[35807]
- Improve CEL input performance. {pull}35915[35915]
- Adding filename details from zip to response for httpjson {issue}33952[33952] {pull}34044[34044]
- Add `clean_session` configuration setting for MQTT input. {pull}35806[16204]
- Add fingerprint mode for the filestream scanner and new file identity based on it {issue}34419[34419] {pull}35734[35734]
- Add file system metadata to events ingested via filestream {issue}35801[35801] {pull}36065[36065]

*Auditbeat*
- Migration of system/package module storage from gob encoding to flatbuffer encoding in bolt db. {pull}34817[34817]
Expand Down Expand Up @@ -373,6 +381,8 @@ automatic splitting at root level, if root level element is an array. {pull}3415
- Add new parameter `include_linked_accounts` to enable/disable metrics collection from multiple linked AWS Accounts {pull}35648[35648]
- Migrate Azure Billing, Monitor, and Storage metricsets to the newer SDK. {pull}33585[33585]
- Add support for float64 values parsing for statsd metrics of counter type. {pull}35099[35099]
- Add kubernetes.deployment.status.* fields for Kubernetes module {pull}35999[35999]


*Osquerybeat*

Expand Down Expand Up @@ -434,6 +444,3 @@ automatic splitting at root level, if root level element is an array. {pull}3415


==== Known Issues



26 changes: 22 additions & 4 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def cloud(Map args = [:]) {
withCloudTestEnv(args) {
startCloudTestEnv(name: args.directory, dirs: args.dirs, withAWS: args.withAWS)
try {
targetWithoutNode(context: args.context, command: args.command, directory: args.directory, label: args.label, withModule: args.withModule, isMage: true, id: args.id)
targetWithoutNode(dirs: args.dirs, context: args.context, command: args.command, directory: args.directory, label: args.label, withModule: args.withModule, isMage: true, id: args.id)
} finally {
terraformCleanup(name: args.directory, dir: args.directory, withAWS: args.withAWS)
}
Expand Down Expand Up @@ -578,6 +578,7 @@ def target(Map args = [:]) {
* - mage then the dir(location) is required, aka by enabling isMage: true.
*/
def targetWithoutNode(Map args = [:]) {
def dirs = args.get('dirs',[])
def command = args.command
def context = args.context
def directory = args.get('directory', '')
Expand All @@ -590,9 +591,22 @@ def targetWithoutNode(Map args = [:]) {
def enableRetry = args.get('enableRetry', false)
def withGCP = args.get('withGCP', false)
def withNodejs = args.get('withNodejs', false)
String name = normalise(args.directory)
withGithubNotify(context: "${context}") {
withBeatsEnv(archive: true, withModule: withModule, directory: directory, id: args.id) {
dumpVariables()
// unstash terraform outputs in the same directory where the files were stashed
dirs?.each { folder ->
dir("${folder}") {
try {
unstash("terraform-${name}")
//unstash does not print verbose output , hence printing contents of the directory for logging purposes
sh "ls -la ${pwd()}"
} catch (error) {
echo "error unstashing: ${error}"
}
}
}
withTools(k8s: installK8s, gcp: withGCP, nodejs: withNodejs) {
// make commands use -C <folder> while mage commands require the dir(folder)
// let's support this scenario with the location variable.
Expand Down Expand Up @@ -932,10 +946,14 @@ def startCloudTestEnv(Map args = [:]) {
}
error('startCloudTestEnv: terraform apply failed.')
} finally {
// Archive terraform states in case manual cleanup is needed.
archiveArtifacts(allowEmptyArchive: true, artifacts: '**/terraform.tfstate')
dirs?.each { folder ->
// Archive terraform states in case manual cleanup is needed.
archiveArtifacts(allowEmptyArchive: true, artifacts: '**/terraform.tfstate')
dir("${folder}") {
stash(name: "terraform-${name}", allowEmpty: true, includes: '**/terraform.tfstate,**/.terraform/**,outputs*.yml')
}
}
}
stash(name: "terraform-${name}", allowEmpty: true, includes: '**/terraform.tfstate,**/.terraform/**')
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions NOTICE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13448,11 +13448,11 @@ Contents of probable licence file $GOMODCACHE/github.com/elastic/[email protected]

--------------------------------------------------------------------------------
Dependency : github.com/elastic/go-elasticsearch/v8
Version: v8.8.1
Version: v8.8.2
Licence type (autodetected): Apache-2.0
--------------------------------------------------------------------------------

Contents of probable licence file $GOMODCACHE/github.com/elastic/go-elasticsearch/[email protected].1/LICENSE:
Contents of probable licence file $GOMODCACHE/github.com/elastic/go-elasticsearch/[email protected].2/LICENSE:

Apache License
Version 2.0, January 2004
Expand Down
13 changes: 13 additions & 0 deletions filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,19 @@ filebeat.inputs:
# original for harvesting but will report the symlink name as the source.
#prospector.scanner.symlinks: false

# If enabled, instead of relying on the device ID and inode values when comparing files,
# compare hashes of the given byte ranges in files. A file becomes an ingest target
# when its size grows larger than offset+length (see below). Until then it's ignored.
#prospector.scanner.fingerprint.enabled: false

# If fingerprint mode is enabled, sets the offset from the beginning of the file
# for the byte range used for computing the fingerprint value.
#prospector.scanner.fingerprint.offset: 0

# If fingerprint mode is enabled, sets the length of the byte range used for
# computing the fingerprint value. Cannot be less than 64 bytes.
#prospector.scanner.fingerprint.length: 1024

### Parsers configuration

#### JSON configuration
Expand Down
9 changes: 4 additions & 5 deletions filebeat/docs/inputs/input-common-file-options.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ certain criteria or time. Closing the harvester means closing the file handler.
If a file is updated after the harvester is closed, the file will be picked up
again after `scan_frequency` has elapsed. However, if the file is moved or
deleted while the harvester is closed, {beatname_uc} will not be able to pick up
the file again, and any data that the harvester hasn't read will be lost.
The `close_*` settings are applied synchronously when {beatname_uc} attempts
the file again, and any data that the harvester hasn't read will be lost.
The `close_*` settings are applied synchronously when {beatname_uc} attempts
to read from a file, meaning that if {beatname_uc} is in a blocked state
due to blocked output, full queue or other issue, a file that would
due to blocked output, full queue or other issue, a file that would
otherwise be closed remains open until {beatname_uc} once again attempts to read from the file.


Expand Down Expand Up @@ -240,7 +240,7 @@ that should be removed based on the `clean_inactive` setting. This happens
because {beatname_uc} doesn't remove the entries until it opens the registry
again to read a different file. If you are testing the `clean_inactive` setting,
make sure {beatname_uc} is configured to read from more than one file, or the
file state will never be removed from the registry.
file state will never be removed from the registry.

[float]
[id="{beatname_lc}-input-{type}-clean-removed"]
Expand Down Expand Up @@ -441,4 +441,3 @@ Set the location of the marker file the following way:
----
file_identity.inode_marker.path: /logs/.filebeat-marker
----

5 changes: 3 additions & 2 deletions filebeat/docs/inputs/input-common-udp-options.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ The host and UDP port to listen on for event streams.
[id="{beatname_lc}-input-{type}-udp-read-buffer"]
==== `read_buffer`

The size of the read buffer on the UDP socket.
The size of the read buffer on the UDP socket. If not specified the default
from the operating system will be used.

[float]
[id="{beatname_lc}-input-{type}-udp-timeout"]
==== `timeout`

The read and write timeout for socket operations.
The read and write timeout for socket operations. The default is `5m`.
67 changes: 67 additions & 0 deletions filebeat/docs/inputs/input-filestream-file-options.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,62 @@ stays open and constantly polls your files.

The default setting is 10s.

[float]
[id="{beatname_lc}-input-{type}-scan-fingerprint"]
===== `prospector.scanner.fingerprint`

Instead of relying on the device ID and inode values when comparing files, compare hashes of the given byte ranges of files.

Enable this option if you're experiencing data loss or data duplication due to unstable file identifiers provided by the file system.

Following are some scenarios where this can happen:

. Some file systems (i.e. in Docker) cache and re-use inodes
+
for example if you:
+
.. Create a file (`touch x`)
.. Check the file's inode (`ls -i x`)
.. Delete the file (`rm x`)
.. Create a new file right away (`touch y`)
.. Check the inode of the new file (`ls -i y`)
+

For both files you might see the same inode value despite even having different filenames.
+
. Non-Ext file systems can change inodes:
+
Ext file systems store the inode number in the `i_ino` file, inside a struct `inode`, which is written to disk. In this case, if the file is the same (not another file with the same name) then the inode number is guaranteed to be the same.
+
If the file system is other than Ext, the inode number is generated by the inode operations defined by the file system driver. As they don't have the concept of what an inode is, they have to mimic all of the inode's internal fields to comply with VFS, so this number will probably be different after a reboot, even after closing and opening the file again (theoretically).
+
. Some file processing tools change inode values
+
Sometimes users unintentionally change inodes by using tools like `rsync` or `sed`.
+
. Some operating systems change device IDs after reboot
+
Depending on a mounting approach, the device ID (which is also used for comparing files) might change after a reboot.

**Configuration**

Fingerprint mode is disabled by default.

WARNING: Enabling fingerprint mode delays ingesting new files until they grow to at least `offset`+`length` bytes in size, so they can be fingerprinted. Until then these files are ignored.

Normally, log lines contain timestamps and other unique fields that should be able to use the fingerprint mode,
but in every use-case users should inspect their logs to determine what are the appropriate values for
the `offset` and `length` parameters. Default `offset` is `0` and default `length` is `1024` or 1 KB. `length` cannot be less than `64`.

[source,yaml]
----
fingerprint:
enabled: false
offset: 0
length: 1024
----


[float]
[id="{beatname_lc}-input-{type}-ignore-older"]
===== `ignore_older`
Expand Down Expand Up @@ -502,6 +558,17 @@ Set the location of the marker file the following way:
file_identity.inode_marker.path: /logs/.filebeat-marker
----

*`fingerprint`*:: To identify files based on their content byte range.

WARNING: In order to use this file identity option, you must enable the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint option in the scanner>>. Once this file identity is enabled, changing the fingerprint configuration (offset, length, or other settings) will lead to a global re-ingestion of all files that match the paths configuration of the input.

Please refer to the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint configuration for details>>.

[source,yaml]
----
file_identity.fingerprint: ~
----

[[filestream-log-rotation-support]]
[float]
=== Log rotation
Expand Down
9 changes: 8 additions & 1 deletion filebeat/docs/inputs/input-filestream.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ device IDs. However, on network shares and cloud providers these
values might change during the lifetime of the file. If this happens
{beatname_uc} thinks that file is new and resends the whole content
of the file. To solve this problem you can configure `file_identity` option. Possible
values besides the default `inode_deviceid` are `path` and `inode_marker`.
values besides the default `inode_deviceid` are `path`, `inode_marker` and `fingerprint`.

WARNING: Changing `file_identity` methods between runs may result in
duplicated events in the output.
Expand All @@ -116,6 +116,13 @@ example oneliner generates a hidden marker file for the selected mountpoint `/lo
Please note that you should not use this option on Windows as file identifiers might be
more volatile.

Selecting `fingerprint` instructs {beatname_uc} to identify files based on their
content byte range.

WARNING: In order to use this file identity option, one must enable the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint option in the scanner>>. Once this file identity is enabled, changing the fingerprint configuration (offset, length, etc) will lead to a global re-ingestion of all files that match the paths configuration of the input.

Please refer to the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint configuration for details>>.

["source","sh",subs="attributes"]
----
$ lsblk -o MOUNTPOINT,UUID | grep /logs | awk '{print $2}' >> /logs/.filebeat-marker
Expand Down
11 changes: 11 additions & 0 deletions filebeat/docs/inputs/input-mqtt.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ A client username used for authentication provided on the application level by t

A client password used for authentication provided on the application level by the MQTT protocol.

===== `clean_session`

The `clean_session` flag indicates whether the client wants to establish a persistent session with the broker.
The default is `true`.

When `clean_session` is set to false, the session is considered to be persistent. The broker stores all subscriptions for
the client and all missed messages for the client that subscribed with a Quality of Service (QoS) level 1 or 2.

In contrast, when `clean_session` is set to true, the broker doesn’t retain any information for the client
and discards any previous state from any persistent session.

===== `ssl`

Configuration options for SSL parameters like the certificate, key and the certificate authorities
Expand Down
13 changes: 13 additions & 0 deletions filebeat/filebeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,19 @@ filebeat.inputs:
# original for harvesting but will report the symlink name as the source.
#prospector.scanner.symlinks: false

# If enabled, instead of relying on the device ID and inode values when comparing files,
# compare hashes of the given byte ranges in files. A file becomes an ingest target
# when its size grows larger than offset+length (see below). Until then it's ignored.
#prospector.scanner.fingerprint.enabled: false

# If fingerprint mode is enabled, sets the offset from the beginning of the file
# for the byte range used for computing the fingerprint value.
#prospector.scanner.fingerprint.offset: 0

# If fingerprint mode is enabled, sets the length of the byte range used for
# computing the fingerprint value. Cannot be less than 64 bytes.
#prospector.scanner.fingerprint.length: 1024

### Parsers configuration

#### JSON configuration
Expand Down
1 change: 1 addition & 0 deletions filebeat/input/filestream/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
type config struct {
Reader readerConfig `config:",inline"`

ID string `config:"id"`
Paths []string `config:"paths"`
Close closerConfig `config:"close"`
FileWatcher *conf.Namespace `config:"prospector"`
Expand Down
4 changes: 3 additions & 1 deletion filebeat/input/filestream/copytruncate_prospector.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,9 @@ func (p *copyTruncateFileProspector) onRotatedFile(
hg.Start(ctx, src)
return
}
originalSrc := p.identifier.GetSource(loginp.FSEvent{NewPath: originalPath, Info: fi})
descCopy := fe.Descriptor
descCopy.Info = fi
originalSrc := p.identifier.GetSource(loginp.FSEvent{NewPath: originalPath, Descriptor: descCopy})
p.rotatedFiles.addOriginalFile(originalPath, originalSrc)
p.rotatedFiles.addRotatedFile(originalPath, fe.NewPath, src)
hg.Start(ctx, src)
Expand Down
2 changes: 1 addition & 1 deletion filebeat/input/filestream/environment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ func (e *inputTestingEnvironment) getRegistryState(key string) (registryEntry, e

func getIDFromPath(filepath, inputID string, fi os.FileInfo) string {
identifier, _ := newINodeDeviceIdentifier(nil)
src := identifier.GetSource(loginp.FSEvent{Info: fi, Op: loginp.OpCreate, NewPath: filepath})
src := identifier.GetSource(loginp.FSEvent{Descriptor: loginp.FileDescriptor{Info: fi}, Op: loginp.OpCreate, NewPath: filepath})
return "filestream::" + inputID + "::" + src.Name()
}

Expand Down
Loading

0 comments on commit 3daa61e

Please sign in to comment.