Open
Description
We are utilizing EFS on both Pods and Nodes(AL2) within AWS EKS (v1.29). Within the Pods, we acess efs directories by EFS CSI plugin(v1.5.8), while on the Nodes, we install amazon-efs-utils(v2.1.0) to modify the fstab for mounting directories. Both Pod and Node are enable TLS. Using the EFS CSI alone does not present any issues; however, when used simultaneously on the Nodes, although access is initially normal, after approximately a dozen hours, directory access errors begin to occur:
ls: cannot access /data/log/xxx: Permission denied
dmesg show errors:
[Fri Jan 17 01:55:24 2025] NFS: state manager: check lease failed on NFSv4 server 127.0.0.1 with error 13
and some stunnel5 processes or efs-proxy processes:
# ps -ef | grep stunnel
root 26991 3781 0 Jan16 ? 00:00:00 /usr/bin/stunnel5 /var/run/efs/stunnel-config.fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117
root 82623 1 0 Jan16 ? 00:04:17 /sbin/efs-proxy /var/run/efs/stunnel-config.fs-059735545feda3989.data.log.xxx.20584 --tls
root 95784 1 0 Jan16 ? 00:00:00 /sbin/efs-proxy /var/run/efs/stunnel-config.fs-059735545feda3989.mnt.20538
root 95897 1 0 Jan16 ? 00:00:01 /sbin/efs-proxy /var/run/efs/stunnel-config.fs-059735545feda3989.mnt.20188 --tls
root 571198 540014 0 07:02 pts/1 00:00:00 grep --color=auto stunnel
but stunnel-config is missing:
# ls -al /var/run/efs/
total 8
drwxr-xr-x 3 root root 100 Jan 16 14:46 .
drwxr-xr-x 30 root root 1000 Jan 17 06:55 ..
-rw-r--r-- 1 root root 1149 Jan 16 14:46 fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117
drwxr-xr-x 4 root root 160 Jan 16 08:01 fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117+
-rw-r--r-- 1 root root 793 Jan 16 08:01 stunnel-config.fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117
We find some errors in amazon-efs-watchdog log:
2025-01-17 03:27:10 UTC - ERROR - Unable to parse json in /var/run/efs/fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117
Traceback (most recent call last):
File "/usr/bin/amazon-efs-mount-watchdog", line 1155, in check_efs_mounts
state = json.load(f)
File "/usr/lib64/python3.7/json/__init__.py", line 296, in load
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
File "/usr/lib64/python3.7/json/__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "/usr/lib64/python3.7/json/decoder.py", line 340, in decode
raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 1 column 1149 (char 1148)
The json format in stunnel-config is bad:
# cat fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117
{"cmd": ["/usr/bin/stunnel5", "/var/run/efs/stunnel-config.fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117"], "files": ["/var/run/efs/stunnel-config.fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117"], "mount_time": 1737014501.1665277, "mountpoint": "/var/lib/kubelet/pods/37f75393-a9db-4513-b474-28c2235b6ddc/volumes/kubernetes.io~csi/pv-efs-data-log/mount", "mountStateDir": "fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117+", "commonName": "ip-10-11-142-95.ap-east-1.compute.internal", "region": "ap-east-1", "certificateCreationTime": "250116140141Z", "certificate": "/var/run/efs/fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117+/certificate.pem", "privateKey": "/etc/amazon/efs/privateKey.pem", "fsId": "fs-059735545feda3989", "unmount_count": 0, "last_stunnel_check_time": 1737022007.49994, "pid": 60399}}
# cat fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117 | jq .
{
"cmd": [
"/usr/bin/stunnel5",
"/var/run/efs/stunnel-config.fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117"
],
"files": [
"/var/run/efs/stunnel-config.fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117"
],
"mount_time": 1737014501.1665277,
"mountpoint": "/var/lib/kubelet/pods/37f75393-a9db-4513-b474-28c2235b6ddc/volumes/kubernetes.io~csi/pv-efs-data-log/mount",
"mountStateDir": "fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117+",
"commonName": "ip-10-11-142-95.ap-east-1.compute.internal",
"region": "ap-east-1",
"certificateCreationTime": "250116140141Z",
"certificate": "/var/run/efs/fs-059735545feda3989.var.lib.kubelet.pods.37f75393-a9db-4513-b474-28c2235b6ddc.volumes.kubernetes.io~csi.pv-efs-data-log.mount.20117+/certificate.pem",
"privateKey": "/etc/amazon/efs/privateKey.pem",
"fsId": "fs-059735545feda3989",
"unmount_count": 0,
"last_stunnel_check_time": 1737022007.49994,
"pid": 60399
}
parse error: Unmatched '}' at line 1, column 1149
Metadata
Metadata
Assignees
Labels
No labels