Skip to content

k8s troubleshooting error scale

不明原因scale扩容失败,容器起不来

Problem
[root@k8s-master ~]# kubectl get rs,po
NAME                                      DESIRED   CURRENT   READY   AGE
replicaset.apps/nginx-deploy-548d7bc8bc   1         1         0       16m
replicaset.apps/nginx-deploy-5f44797d7f   1         1         0       13m
replicaset.apps/nginx-deploy-674dcdf579   2         2         2       17h
replicaset.apps/nginx-deploy-f7f5656c7    0         0         0       17h

NAME                                READY   STATUS              RESTARTS   AGE
pod/my-nginx                        1/1     Running             0          19h
pod/nginx-deploy-548d7bc8bc-pfhmn   0/1     ContainerCreating   0          16m
pod/nginx-deploy-5f44797d7f-jtmsd   0/1     ContainerCreating   0          13m
pod/nginx-deploy-674dcdf579-5gvbw   1/1     Running             0          17h
pod/nginx-deploy-674dcdf579-l79w8   1/1     Running             0          17h
pod/nginx-deploy-674dcdf579-t979q   0/1     Terminating         0          26m
[root@k8s-master ~]# kubectl edit deploy nginx-deploy
Pod_describe-nginx-deploy-548d7bc8bc-pfhmn
[root@k8s-master ~]# kubectl describe pod/nginx-deploy-548d7bc8bc-pfhmn
Name:           nginx-deploy-548d7bc8bc-pfhmn
Namespace:      default
Priority:       0
Node:           k8s-node1/10.0.0.101
Start Time:     Thu, 25 Apr 2024 22:10:19 -0400
Labels:         app=nginx-deploy
                pod-template-hash=548d7bc8bc
Annotations:    <none>
Status:         Pending
IP:             
IPs:            <none>
Controlled By:  ReplicaSet/nginx-deploy-548d7bc8bc
Containers:
nginx:
    Container ID:   
    Image:          nginx:1.25.5
    Image ID:       
    Port:           <none>
    Host Port:      <none>
    State:          Waiting
    Reason:       ContainerCreating
    Ready:          False
    Restart Count:  0
    Requests:
    cpu:        100m
    memory:     128Mi
    Environment:  <none>
    Mounts:
    /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-v5qt4 (ro)
Conditions:
Type              Status
Initialized       True 
Ready             False 
ContainersReady   False 
PodScheduled      True 
Volumes:
kube-api-access-v5qt4:
    Type:                    Projected (a volume that contains injected data from multiple sources)
    TokenExpirationSeconds:  3607
    ConfigMapName:           kube-root-ca.crt
    ConfigMapOptional:       <nil>
    DownwardAPI:             true
QoS Class:                   Burstable
Node-Selectors:              <none>
Tolerations:                 node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                            node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type     Reason                  Age                   From               Message
----     ------                  ----                  ----               -------
Normal   Scheduled               24m                   default-scheduler  Successfully assigned default/nginx-deploy-548d7bc8bc-pfhmn to k8s-node1
Warning  FailedCreatePodSandBox  24m                   kubelet            Failed to create pod sandbox: rpc error: code = Unknown desc = [failed to set up sandbox container "d0c52072346d605a7594b91391ccd8ceb7803a629412e165a0e49f8b1fb498ba" network for pod "nginx-deploy-548d7bc8bc-pfhmn": networkPlugin cni failed to set up pod "nginx-deploy-548d7bc8bc-pfhmn_default" network: error getting ClusterInformation: connection is unauthorized: Unauthorized, failed to clean up sandbox container "d0c52072346d605a7594b91391ccd8ceb7803a629412e165a0e49f8b1fb498ba" network for pod "nginx-deploy-548d7bc8bc-pfhmn": networkPlugin cni failed to teardown pod "nginx-deploy-548d7bc8bc-pfhmn_default" network: error getting ClusterInformation: connection is unauthorized: Unauthorized]
Normal   SandboxChanged          3m58s (x97 over 24m)  kubelet            Pod sandbox changed, it will be killed and re-created.
Deployment_yaml_file
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
    deployment.kubernetes.io/revision: "7"
creationTimestamp: "2024-04-25T07:44:53Z"
generation: 9
labels:
    app: nginx-deploy
name: nginx-deploy
namespace: default
resourceVersion: "77441"
uid: 02e581e3-6bbd-4af9-9d3a-9eda5b1c790b
spec:
progressDeadlineSeconds: 600
replicas: 3
revisionHistoryLimit: 10
selector:
    matchLabels:
    app: nginx-deploy
strategy:
    rollingUpdate:
    maxSurge: 25%
    maxUnavailable: 25%
    type: RollingUpdate
template:
    metadata:
    creationTimestamp: null
    labels:
        app: nginx-deploy
        spec:
    containers:
    - image: nginx:1.25.5
        imagePullPolicy: IfNotPresent
        name: nginx
        resources:
        requests:
            cpu: 200m
            memory: 256Mi
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
    dnsPolicy: ClusterFirst
    restartPolicy: Always
    schedulerName: default-scheduler
    securityContext: {}
    terminationGracePeriodSeconds: 30
status:
availableReplicas: 2
conditions:
- lastTransitionTime: "2024-04-26T02:00:13Z"
    lastUpdateTime: "2024-04-26T02:00:13Z"
    message: Deployment does not have minimum availability.
    reason: MinimumReplicasUnavailable
    status: "False"
    type: Available
- lastTransitionTime: "2024-04-26T02:22:40Z"
    lastUpdateTime: "2024-04-26T02:22:40Z"
    message: ReplicaSet "nginx-deploy-5f44797d7f" has timed out progressing.
    reason: ProgressDeadlineExceeded
    status: "False"
    type: Progressing
observedGeneration: 9
readyReplicas: 2
    replicas: 4
unavailableReplicas: 2
updatedReplicas: 1

参考这个issue

最简单是通过重启Master解决。