diff --git a/packaging/pom.xml b/packaging/pom.xml index 46949bd66b7f..df4d33309e31 100644 --- a/packaging/pom.xml +++ b/packaging/pom.xml @@ -27,6 +27,9 @@ .. apache-hive-${project.version}-jdbc.jar + + src/kubernetes + dist @@ -272,6 +275,10 @@ + + kubernetes + + diff --git a/packaging/src/docker/entrypoint.sh b/packaging/src/docker/entrypoint.sh index 656bf63ec601..5ec094edce31 100644 --- a/packaging/src/docker/entrypoint.sh +++ b/packaging/src/docker/entrypoint.sh @@ -153,7 +153,7 @@ function run_tezam { exit 1 fi # service_plugins_descriptor.json references org.apache.hadoop.hive.llap.tezplugins.* (hive-llap-tez, etc.) - tezam_cp="${HADOOP_CONF_DIR}:${TEZ_CONF_DIR}:${TEZ_SNAPSHOT_HOME}/*:${TEZ_HOME}/*:${TEZ_HOME}/lib/*:${HIVE_HOME}/lib/*:${HADOOP_HOME}/share/hadoop/common/*:${HADOOP_HOME}/share/hadoop/common/lib/*:${HADOOP_HOME}/share/hadoop/yarn/*:${HADOOP_HOME}/share/hadoop/yarn/lib/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/hdfs/lib/*:${HADOOP_HOME}/share/hadoop/mapreduce/*:${HADOOP_HOME}/share/hadoop/mapreduce/lib/*:${HADOOP_CLASSPATH:-}" + tezam_cp="${HADOOP_CONF_DIR}:${TEZ_CONF_DIR}:${TEZ_SNAPSHOT_HOME}/*:${TEZ_HOME}/*:${TEZ_HOME}/lib/*:${HIVE_HOME}/lib/*:$("${HADOOP_HOME}/bin/hadoop" classpath)" local java_bin local tezam_java_opts diff --git a/packaging/src/docker/start-hive.sh b/packaging/src/docker/start-hive.sh index 82a4c9952458..a76eea2cf31f 100755 --- a/packaging/src/docker/start-hive.sh +++ b/packaging/src/docker/start-hive.sh @@ -44,6 +44,7 @@ for arg in "$@"; do export S3_ENDPOINT_URL="http://s3.ozone:9878" export AWS_ACCESS_KEY_ID="ozone" export AWS_SECRET_ACCESS_KEY="secret" + export HADOOP_OPTIONAL_TOOLS="hadoop-aws" ;; *) echo "Unknown option: $arg" diff --git a/packaging/src/kubernetes/Dockerfile b/packaging/src/kubernetes/Dockerfile new file mode 100644 index 000000000000..9d688e67a7ac --- /dev/null +++ b/packaging/src/kubernetes/Dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM eclipse-temurin:21-jre-ubi9-minimal + +ARG OPERATOR_JAR=target/hive-kubernetes-operator-*-shaded.jar + +WORKDIR /opt/hive-operator + +COPY ${OPERATOR_JAR} operator.jar + +USER 1000:1000 + +ENTRYPOINT ["java", "-jar", "operator.jar"] diff --git a/packaging/src/kubernetes/README.md b/packaging/src/kubernetes/README.md new file mode 100644 index 000000000000..fa87fa25da76 --- /dev/null +++ b/packaging/src/kubernetes/README.md @@ -0,0 +1,759 @@ + + +# Hive Kubernetes Operator + +A Java-based Kubernetes operator that manages Apache Hive clusters declaratively +using a single `HiveCluster` custom resource. Built with +[Java Operator SDK (JOSDK)](https://javaoperatorsdk.io/) and +[fabric8 Kubernetes client](https://github.com/fabric8io/kubernetes-client). + +## Features + +- **Single CRD** (`HiveCluster`) manages all Hive components +- **Four Hive services**: Metastore, HiveServer2, LLAP, and Tez AM +- **Helm chart** with sensible defaults — provide DB + ZK + storage, get a full-HA cluster +- **Storage-agnostic**: works with any Hadoop-compatible filesystem (S3A, + ABFS, GCS, HDFS, Ozone) +- **Automatic dependency ordering**: schema init -> Metastore -> HiveServer2 -> LLAP/TezAM +- **Optional components**: LLAP and Tez AM enabled/disabled via flags +- **External Metastore**: point HiveServer2 at an existing Metastore +- **Status reporting**: per-component readiness tracked on the CRD status + +--- + +## Build from Source + +```bash +# Build the operator JAR + CRD + Helm chart (no Docker image) +mvn clean package -pl packaging/src/kubernetes -DskipTests + +# Build everything including the Docker image (includes the above) +mvn clean package -pl packaging/src/kubernetes -Pkubernetes -DskipTests +``` + +| Artifact | Path | +|----------|------| +| Shaded JAR | `target/hive-kubernetes-operator-*-shaded.jar` | +| CRD YAML | `helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml` | +| Helm chart | `helm/hive-operator/` | +| Docker image | `apache/hive:operator-` | + +--- + +## Quick Start (Helm) + +The Helm chart defaults to a **Full-HA** cluster (Metastore x2, HiveServer2 x2, +LLAP x2, TezAM x2). You only need to provide three things: database, ZooKeeper, +and storage. + +### Prerequisites + +- Kubernetes 1.25+ +- Helm 3.x +- A ZooKeeper instance (or install one below) +- A storage backend (Ozone, S3, ABFS, GCS, HDFS) +- A supported RDBMS for the Metastore (or install one below) + +### Step 1: Install Dependencies + +```bash +# ZooKeeper +helm repo add bitnami https://charts.bitnami.com/bitnami +helm install zookeeper bitnami/zookeeper \ + --set replicaCount=1 --set auth.enabled=false \ + --set image.repository=bitnamilegacy/zookeeper \ + --set image.tag=3.9.3-debian-12-r21 \ + --set global.security.allowInsecureImages=true --wait + +# PostgreSQL +helm install postgres bitnami/postgresql \ + --set auth.username=hive --set auth.password=hive123 \ + --set auth.database=metastore --wait + +# Create the DB password secret +kubectl create secret generic hive-db-secret --from-literal=password=hive123 +``` + +If using **Ozone** as the storage backend: + +```bash +helm repo add ozone https://apache.github.io/ozone-helm-charts/ +helm install ozone ozone/ozone --version 0.2.0 --wait +sleep 50 +kubectl exec statefulset/ozone-om -- ozone sh volume create /s3v +kubectl exec statefulset/ozone-om -- ozone sh bucket create /s3v/hive +``` + +### Step 2: Install the Hive Operator + Cluster + +Choose your storage backend from the examples below. Each shows the CLI command +and an equivalent values file. + +--- + +## Storage Backend Examples + +Each example below shows both the `helm install` CLI command and the equivalent +`values.yaml` file. Use whichever approach you prefer. + +### Ozone (Full-HA, default behavior) + +**CLI:** + +```bash +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ + --set cluster.storage.coreSiteOverrides."fs\.s3a\.endpoint"="http://ozone-s3g-rest:9878" \ + --set-string cluster.storage.coreSiteOverrides."fs\.s3a\.path\.style\.access"=true \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].value=ozone' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].value=ozone' +``` + +**Values file:** + +```yaml +# values.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + zookeeper: + quorum: "zookeeper:2181" + + storage: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" +``` + +```bash +helm install hive ./helm/hive-operator -f values.yaml +``` + +--- + +### AWS S3 + +**CLI:** + +Create the secret with your AWS credentials: +```bash +kubectl create secret generic aws-s3-creds \ + --from-literal=accessKey="" \ + --from-literal=secretKey="" +``` + +Then install the operator and HiveCluster with the appropriate storage config: + +```bash +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive-k8s-bucket" \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].valueFrom.secretKeyRef.name=aws-s3-creds' \ + --set 'cluster.storage.envVars[1].valueFrom.secretKeyRef.key=accessKey' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].valueFrom.secretKeyRef.name=aws-s3-creds' \ + --set 'cluster.storage.envVars[2].valueFrom.secretKeyRef.key=secretKey' +``` + +**Values file:** + +```yaml +# values.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + zookeeper: + quorum: "zookeeper:2181" + + storage: + coreSiteOverrides: + fs.defaultFS: "s3a://hive-k8s-bucket" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-s3-creds + key: accessKey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-s3-creds + key: secretKey +``` + +```bash +helm install hive ./helm/hive-operator -f values.yaml +``` + +--- + +### Google Cloud Storage (GCS) + +Create the secret with your GCS service account key: + +```bash +kubectl create secret generic gcs-creds --from-file=key.json=.json +``` + +**CLI:** + +```bash +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set 'cluster.storage.coreSiteOverrides.fs\.defaultFS=gs://hive-bucket' \ + --set 'cluster.storage.coreSiteOverrides.fs\.gs\.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem' \ + --set 'cluster.storage.coreSiteOverrides.fs\.gs\.auth\.type=SERVICE_ACCOUNT_JSON_KEYFILE' \ + --set 'cluster.storage.coreSiteOverrides.fs\.gs\.auth\.service\.account\.json\.keyfile=/etc/gcs/key.json' \ + --set-string 'cluster.storage.coreSiteOverrides.fs\.gs\.reported\.permissions=777' \ + --set 'cluster.storage.externalJars[0]=https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.25/gcs-connector-hadoop3-2.2.25-shaded.jar' \ + --set 'cluster.storage.volumes[0].name=gcs-key' \ + --set 'cluster.storage.volumes[0].secret.secretName=gcs-creds' \ + --set 'cluster.storage.volumeMounts[0].name=gcs-key' \ + --set 'cluster.storage.volumeMounts[0].mountPath=/etc/gcs' \ + --set 'cluster.storage.volumeMounts[0].readOnly=true' +``` + +**Values file:** + +```yaml +# values.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + zookeeper: + quorum: "zookeeper:2181" + + storage: + coreSiteOverrides: + fs.defaultFS: "gs://hive-bucket" + fs.gs.impl: "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem" + fs.gs.auth.type: "SERVICE_ACCOUNT_JSON_KEYFILE" + fs.gs.auth.service.account.json.keyfile: "/etc/gcs/key.json" + fs.gs.reported.permissions: "777" + externalJars: + - "https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.25/gcs-connector-hadoop3-2.2.25-shaded.jar" + volumes: + - name: gcs-key + secret: + secretName: gcs-creds + volumeMounts: + - name: gcs-key + mountPath: /etc/gcs + readOnly: true +``` + +```bash +helm install hive ./helm/hive-operator -f values.yaml +``` + +--- + +## Deployment Modes + +### Minimal Cluster (no LLAP/TezAM) + +**CLI:** + +```bash +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ + --set cluster.storage.coreSiteOverrides."fs\.s3a\.endpoint"="http://ozone-s3g-rest:9878" \ + --set-string cluster.storage.coreSiteOverrides."fs\.s3a\.path\.style\.access"=true \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].value=ozone' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].value=ozone' \ + --set cluster.metastore.replicas=1 \ + --set cluster.hiveServer2.replicas=1 \ + --set cluster.llap.enabled=false \ + --set cluster.tezAm.enabled=false +``` + +**Values file:** + +```yaml +# values.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + zookeeper: + quorum: "zookeeper:2181" + + storage: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" + + metastore: + replicas: 1 + hiveServer2: + replicas: 1 + llap: + enabled: false + tezAm: + enabled: false +``` + +```bash +helm install hive ./helm/hive-operator -f values.yaml +``` + +--- + +### External Metastore (skip Metastore deployment) + +**CLI:** + +```bash +helm install hive ./helm/hive-operator \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.metastore.enabled=false \ + --set cluster.metastore.externalUri="thrift://my-external-metastore:9083" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].value=ozone' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].value=ozone' +``` + +**Values file:** + +```yaml +# values.yaml +cluster: + database: {} # Not needed when metastore is external + + zookeeper: + quorum: "zookeeper:2181" + + metastore: + enabled: false + externalUri: "thrift://my-external-metastore:9083" + + storage: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" +``` + +```bash +helm install hive ./helm/hive-operator -f values.yaml +``` + +--- + +### Custom Replicas and Resources + +**Values file:** + +```yaml +# values.yaml +cluster: + # ... database, zookeeper, storage as above ... + + metastore: + replicas: 3 + resources: + requestsMemory: "1Gi" + limitsMemory: "2Gi" + + hiveServer2: + replicas: 4 + serviceType: LoadBalancer + resources: + requestsCpu: "1" + requestsMemory: "2Gi" + limitsMemory: "4Gi" + + llap: + enabled: true + replicas: 3 + executors: 2 + memoryMb: 4096 + resources: + requestsMemory: "4Gi" + limitsMemory: "6Gi" + + tezAm: + replicas: 3 + scratchStorageSize: "5Gi" +``` + +```bash +helm install hive ./helm/hive-operator -f values.yaml +``` + +--- + +## Verify + +```bash +kubectl get pods -w +kubectl get hiveclusters +kubectl describe hivecluster hive +``` + +## Connect to HiveServer2 + +```bash +kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://hive-hiveserver2:10000/" +``` + +Or via port-forward: + +```bash +kubectl port-forward svc/hive-hiveserver2 10000:10000 +beeline -u "jdbc:hive2://localhost:10000/" +``` + +--- + +## Helm Values Reference + +### Operator + +| Value | Default | Description | +|-------|---------|-------------| +| `operator.image.repository` | `apache/hive` | Operator image repository | +| `operator.image.tag` | `operator-4.3.0-SNAPSHOT` | Operator image tag | +| `operator.image.pullPolicy` | `IfNotPresent` | Image pull policy | +| `operator.resources` | `{requests: {cpu: 200m, memory: 256Mi}, limits: {memory: 512Mi}}` | Operator pod resources | + +### Cluster (HiveCluster CR) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.enabled` | `true` | Create a HiveCluster CR (set `false` to install only the operator) | +| `cluster.name` | `hive` | HiveCluster resource name | +| `cluster.image` | `apache/hive:4.3.0-SNAPSHOT` | Hive component image | +| `cluster.imagePullPolicy` | `IfNotPresent` | Image pull policy: `Always`, `Never`, or `IfNotPresent` | + +### Database (Required) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.database.type` | `postgres` | DB type: `postgres`, `mysql`, `derby` | +| `cluster.database.url` | | JDBC URL | +| `cluster.database.driver` | | JDBC driver class | +| `cluster.database.username` | | DB username | +| `cluster.database.passwordSecretRef.name` | | K8s Secret name | +| `cluster.database.passwordSecretRef.key` | | Key in the Secret (e.g. `password`) | +| `cluster.database.driverJarUrl` | | URL to download JDBC driver | + +### ZooKeeper (Required) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.zookeeper.quorum` | | ZooKeeper connection string (e.g. `zookeeper:2181`) | + +### Storage (Required) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.storage.coreSiteOverrides` | `{}` | `core-site.xml` properties (`fs.defaultFS`, `fs.s3a.*`, etc.) | +| `cluster.storage.envVars` | `[]` | Env vars for all pods (credentials, `HADOOP_OPTIONAL_TOOLS`) | +| `cluster.storage.externalJars` | `[]` | Connector JAR URLs downloaded at startup | +| `cluster.storage.volumes` | `[]` | Volumes for all pods (credential files) | +| `cluster.storage.volumeMounts` | `[]` | Volume mounts for all containers | + +### Metastore + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.metastore.enabled` | `true` | Deploy a managed Metastore | +| `cluster.metastore.externalUri` | | Thrift URI when `enabled: false` | +| `cluster.metastore.replicas` | `2` | Replica count | +| `cluster.metastore.warehouseDir` | `/hive/warehouse` | Warehouse directory | +| `cluster.metastore.resources` | `{}` | CPU/memory | +| `cluster.metastore.configOverrides` | `{}` | Extra `metastore-site.xml` properties | +| `cluster.metastore.extraVolumes` | `[]` | Additional volumes for Metastore pods | +| `cluster.metastore.extraVolumeMounts` | `[]` | Additional volume mounts for Metastore containers | + +### HiveServer2 + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.hiveServer2.replicas` | `2` | Replica count | +| `cluster.hiveServer2.serviceType` | `ClusterIP` | K8s Service type | +| `cluster.hiveServer2.thriftPort` | `10000` | Thrift port | +| `cluster.hiveServer2.webUiPort` | `10002` | Web UI port | +| `cluster.hiveServer2.resources` | `{}` | CPU/memory | +| `cluster.hiveServer2.configOverrides` | `{}` | Extra `hive-site.xml` properties | +| `cluster.hiveServer2.externalJars` | `[]` | HS2-specific JARs | +| `cluster.hiveServer2.extraVolumes` | `[]` | Additional volumes for HS2 pods | +| `cluster.hiveServer2.extraVolumeMounts` | `[]` | Additional volume mounts for HS2 containers | + +### LLAP + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.llap.enabled` | `true` | Enable LLAP daemons | +| `cluster.llap.replicas` | `2` | Replica count | +| `cluster.llap.executors` | `1` | Executors per daemon | +| `cluster.llap.memoryMb` | `1024` | Memory per daemon (MB) | +| `cluster.llap.serviceHosts` | `@llap0` | LLAP ZK identity | +| `cluster.llap.resources` | `{}` | CPU/memory | +| `cluster.llap.configOverrides` | `{}` | Extra LLAP config properties | +| `cluster.llap.extraVolumes` | `[]` | Additional volumes for LLAP pods | +| `cluster.llap.extraVolumeMounts` | `[]` | Additional volume mounts for LLAP containers | + +### Tez AM + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.tezAm.enabled` | `true` | Enable Tez Application Master | +| `cluster.tezAm.replicas` | `2` | Replica count | +| `cluster.tezAm.scratchStorageSize` | `1Gi` | Shared scratch PVC size | +| `cluster.tezAm.scratchStorageClassName` | | StorageClass (must support RWX) | +| `cluster.tezAm.resources` | `{}` | CPU/memory | +| `cluster.tezAm.configOverrides` | `{}` | Extra TezAM config properties | +| `cluster.tezAm.extraVolumes` | `[]` | Additional volumes for TezAM pods | +| `cluster.tezAm.extraVolumeMounts` | `[]` | Additional volume mounts for TezAM containers | + +--- + +## Upgrade and Uninstall + +### Upgrade (values only, no CRD changes) + +```bash +helm upgrade hive ./helm/hive-operator -f my-values.yaml +``` + +### Upgrade (with CRD schema changes) + +Helm does **not** update CRDs on `helm upgrade`. If the operator version +includes CRD changes (new status fields, new spec fields), you must +re-apply the CRD manually: + +```bash +kubectl apply -f helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml +helm upgrade hive ./helm/hive-operator -f my-values.yaml +``` + +### Full Uninstall and Reinstall (clean slate) + +```bash +# Uninstall (removes operator + HiveCluster CR + all managed pods) +helm uninstall hive + +# IMPORTANT: Always delete the CRD before reinstalling to ensure +# the updated schema is applied. Helm only creates CRDs on install, +# it never updates existing ones. +kubectl delete crd hiveclusters.hive.apache.org + +# Reinstall +helm install hive ./helm/hive-operator -f my-values.yaml +``` + +### Remove Everything (including dependencies) + +```bash +helm uninstall hive +kubectl delete crd hiveclusters.hive.apache.org +helm uninstall ozone postgres zookeeper --ignore-not-found +kubectl delete pvc data-zookeeper-0 --ignore-not-found +kubectl delete pvc data-postgres-postgresql-0 --ignore-not-found +kubectl delete secret hive-db-secret --ignore-not-found +``` + +--- + +## Advanced: Deploy via Operator Only (without Helm) + +If you prefer raw manifests over Helm, you can deploy the operator and create +HiveCluster CRs manually. This example uses Ozone as the storage backend. + +### 1. Install the CRD + +```bash +kubectl apply -f helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml +``` + +### 2. Deploy RBAC and the Operator + +```bash +kubectl create namespace hive-operator +kubectl apply -f config/rbac/ +export HIVE_VERSION=4.3.0-SNAPSHOT +envsubst < config/operator/deployment.yaml | kubectl apply -f - +``` + +### 3. Deploy Ozone + +```bash +helm repo add ozone https://apache.github.io/ozone-helm-charts/ +helm install ozone ozone/ozone --version 0.2.0 --wait +sleep 50 +kubectl exec statefulset/ozone-om -- ozone sh volume create /s3v +kubectl exec statefulset/ozone-om -- ozone sh bucket create /s3v/hive +``` + +### 4. Create a HiveCluster CR + +Full-HA (Metastore x2, HS2 x2, LLAP x2, TezAM x2): + +```bash +envsubst < config/samples/hivecluster-full-ha.yaml | kubectl apply -f - +``` + +Or minimal (Metastore x1, HS2 x1, no LLAP/TezAM): + +```bash +envsubst < config/samples/hivecluster-minimal.yaml | kubectl apply -f - +``` + +### 5. Cleanup + +```bash +kubectl delete hivecluster hive +envsubst < config/operator/deployment.yaml | kubectl delete -f - +kubectl delete -f config/rbac/ +kubectl delete namespace hive-operator +# Always delete CRD to ensure a clean reinstall picks up schema changes +kubectl delete crd hiveclusters.hive.apache.org +kubectl delete pvc data-zookeeper-0 --ignore-not-found +kubectl delete pvc data-postgres-postgresql-0 --ignore-not-found +kubectl delete secret hive-db-secret --ignore-not-found +helm uninstall ozone postgres zookeeper --ignore-not-found +``` + +--- + +## Architecture + +``` +HiveCluster CR + | + v +HiveClusterReconciler + | + +-- HadoopConfigMapDependent (core-site.xml) + +-- MetastoreConfigMapDependent (metastore-site.xml) + +-- HiveServer2ConfigMapDependent (hive-site.xml + tez-site.xml) + +-- SchemaInitJobDependent (schematool -initOrUpgradeSchema) + +-- MetastoreDeploymentDependent --> MetastoreServiceDependent + +-- HiveServer2DeploymentDependent --> HiveServer2ServiceDependent + +-- LlapStatefulSetDependent --> LlapServiceDependent (optional) + +-- ScratchPvcDependent (shared scratch PVC, optional) + +-- TezAmStatefulSetDependent --> TezAmServiceDependent (optional) +``` + +**Startup order:** +1. ConfigMaps (Hadoop, Metastore, HiveServer2) +2. Schema Init Job [if Metastore enabled] +3. Metastore Deployment + Service [if enabled] +4. HiveServer2 Deployment + Service +5. LLAP + TezAM [if enabled] diff --git a/packaging/src/kubernetes/config/operator/deployment.yaml b/packaging/src/kubernetes/config/operator/deployment.yaml new file mode 100644 index 000000000000..b7d9625daacf --- /dev/null +++ b/packaging/src/kubernetes/config/operator/deployment.yaml @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: hive-operator + namespace: hive-operator + labels: + app.kubernetes.io/name: hive-kubernetes-operator + app.kubernetes.io/managed-by: kubectl +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: hive-kubernetes-operator + template: + metadata: + labels: + app.kubernetes.io/name: hive-kubernetes-operator + spec: + serviceAccountName: hive-operator + containers: + - name: hive-operator + image: apache/hive:operator-${HIVE_VERSION} + imagePullPolicy: IfNotPresent + env: + - name: OPERATOR_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + memory: 512Mi diff --git a/packaging/src/kubernetes/config/rbac/cluster-role-binding.yaml b/packaging/src/kubernetes/config/rbac/cluster-role-binding.yaml new file mode 100644 index 000000000000..26940ba6b37d --- /dev/null +++ b/packaging/src/kubernetes/config/rbac/cluster-role-binding.yaml @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: hive-operator-rolebinding +subjects: + - kind: ServiceAccount + name: hive-operator + namespace: hive-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: hive-operator-role diff --git a/packaging/src/kubernetes/config/rbac/cluster-role.yaml b/packaging/src/kubernetes/config/rbac/cluster-role.yaml new file mode 100644 index 000000000000..22cd4f268b60 --- /dev/null +++ b/packaging/src/kubernetes/config/rbac/cluster-role.yaml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: hive-operator-role +rules: + # HiveCluster CRD management + - apiGroups: ["hive.apache.org"] + resources: ["hiveclusters", "hiveclusters/status", "hiveclusters/finalizers"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Deployments and StatefulSets + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Jobs for schema initialization + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Services, ConfigMaps, and PersistentVolumeClaims + - apiGroups: [""] + resources: ["services", "configmaps", "persistentvolumeclaims"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Secrets: read-only for DB password references + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "watch"] + # Events for status reporting + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + # Pods: read-only for readiness checking + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] diff --git a/packaging/src/kubernetes/config/rbac/service-account.yaml b/packaging/src/kubernetes/config/rbac/service-account.yaml new file mode 100644 index 000000000000..bc48726076b9 --- /dev/null +++ b/packaging/src/kubernetes/config/rbac/service-account.yaml @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hive-operator + namespace: hive-operator diff --git a/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml new file mode 100644 index 000000000000..b15d700ff066 --- /dev/null +++ b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Full HiveCluster (HA): All four services with LLAP, TezAM, and multiple replicas +apiVersion: hive.apache.org/v1alpha1 +kind: HiveCluster +metadata: + name: hive +spec: + image: apache/hive:${HIVE_VERSION} + imagePullPolicy: IfNotPresent + + metastore: + replicas: 2 + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + warehouseDir: "/hive/warehouse" + + hiveServer2: + replicas: 2 + serviceType: ClusterIP + resources: + requestsMemory: "1Gi" + limitsMemory: "2Gi" + configOverrides: + hive.server2.enable.doAs: "false" + + llap: + enabled: true + replicas: 2 + executors: 1 + memoryMb: 1024 + serviceHosts: "@llap0" + resources: + requestsMemory: "2Gi" + limitsMemory: "3Gi" + + tezAm: + enabled: true + replicas: 2 + + zookeeper: + quorum: "zookeeper:2181" + + # Hadoop filesystem configuration — any storage backend (S3A, ABFS, GCS, HDFS, Ozone) + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + + # Environment variables injected into all component pods (credentials, JVM options, etc.) + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" diff --git a/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml b/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml new file mode 100644 index 000000000000..e42b10396dd5 --- /dev/null +++ b/packaging/src/kubernetes/config/samples/hivecluster-minimal.yaml @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Minimal HiveCluster: Metastore + HiveServer2 with external S3-compatible storage +apiVersion: hive.apache.org/v1alpha1 +kind: HiveCluster +metadata: + name: hive +spec: + image: apache/hive:${HIVE_VERSION} + imagePullPolicy: IfNotPresent + + metastore: + replicas: 1 + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + warehouseDir: "/hive/warehouse" + + hiveServer2: + replicas: 1 + serviceType: ClusterIP + + zookeeper: + quorum: "zookeeper:2181" + + # Hadoop filesystem configuration — any storage backend (S3A, ABFS, GCS, HDFS, Ozone) + hadoop: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + + # Environment variables injected into all component pods (credentials, JVM options, etc.) + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" diff --git a/packaging/src/kubernetes/helm/hive-operator/Chart.yaml b/packaging/src/kubernetes/helm/hive-operator/Chart.yaml new file mode 100644 index 000000000000..b1e8104b155e --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/Chart.yaml @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v2 +name: hive-operator +description: Apache Hive Kubernetes Operator - deploys and manages Hive clusters on Kubernetes +type: application +version: "4.3.0-SNAPSHOT" +appVersion: "4.3.0-SNAPSHOT" +kubeVersion: ">=1.25.0" +keywords: + - hive + - hadoop + - sql + - data-warehouse + - kubernetes-operator +home: https://hive.apache.org/ +sources: + - https://github.com/apache/hive +maintainers: + - name: Apache Hive + url: https://hive.apache.org/ diff --git a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml new file mode 100644 index 000000000000..2feee3c587a3 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml @@ -0,0 +1,528 @@ +# Generated by Fabric8 CRDGenerator, manual edits might get overwritten! +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: hiveclusters.hive.apache.org +spec: + group: hive.apache.org + names: + kind: HiveCluster + plural: hiveclusters + shortNames: + - hc + singular: hivecluster + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + spec: + properties: + envVars: + description: "Environment variables injected into all component pods\ + \ (e.g., storage credentials, custom JVM options)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + externalJars: + description: "External JARs (URLs) downloaded into all component pods\ + \ and added to HADOOP_CLASSPATH (e.g., GCS connector, ABFS connector)" + items: + type: string + type: array + hadoop: + description: Hadoop/core-site.xml configuration overrides + properties: + coreSiteOverrides: + additionalProperties: + type: string + description: Key-value pairs written into core-site.xml + type: object + type: object + hiveServer2: + description: HiveServer2 component configuration + properties: + configOverrides: + additionalProperties: + type: string + description: Additional configuration overrides as key-value pairs + type: object + externalJars: + description: 'List of URIs to external JARs to download and add + to HS2 classpath ' + items: + type: string + type: array + extraVolumeMounts: + description: Additional volume mounts for the container + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + extraVolumes: + description: "Additional volumes to attach to the pod (e.g., for\ + \ keytabs or truststores)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: Liveness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + readinessProbe: + description: Readiness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + replicas: + default: 1 + description: Number of replicas + type: integer + resources: + description: Resource requirements for pods + properties: + limitsCpu: + description: "CPU limit (e.g. 2, 1000m)" + type: string + limitsMemory: + description: "Memory limit (e.g. 2Gi, 1024Mi)" + type: string + requestsCpu: + default: 500m + description: "CPU request (e.g. 500m, 1)" + type: string + requestsMemory: + default: 1Gi + description: "Memory request (e.g. 1Gi, 512Mi)" + type: string + type: object + serviceType: + default: ClusterIP + description: "Kubernetes Service type: ClusterIP, LoadBalancer,\ + \ or NodePort" + type: string + thriftPort: + default: 10000 + description: HiveServer2 Thrift port + type: integer + webUiPort: + default: 10002 + description: HiveServer2 Web UI port + type: integer + type: object + x-kubernetes-preserve-unknown-fields: true + image: + description: Docker image to use for all Hive components + type: string + imagePullPolicy: + description: "Image pull policy: Always, Never, or IfNotPresent" + type: string + llap: + description: LLAP daemon configuration. Enabled by default. + properties: + configOverrides: + additionalProperties: + type: string + description: Additional configuration overrides as key-value pairs + type: object + enabled: + default: true + description: Whether LLAP is enabled + type: boolean + executors: + default: 1 + description: Number of LLAP executors per daemon + type: integer + extraVolumeMounts: + description: Additional volume mounts for the container + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + extraVolumes: + description: "Additional volumes to attach to the pod (e.g., for\ + \ keytabs or truststores)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + memoryMb: + default: 1024 + description: Memory in MB per LLAP daemon instance + type: integer + readinessProbe: + description: Readiness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + replicas: + default: 1 + description: Number of replicas + type: integer + resources: + description: Resource requirements for pods + properties: + limitsCpu: + description: "CPU limit (e.g. 2, 1000m)" + type: string + limitsMemory: + description: "Memory limit (e.g. 2Gi, 1024Mi)" + type: string + requestsCpu: + default: 500m + description: "CPU request (e.g. 500m, 1)" + type: string + requestsMemory: + default: 1Gi + description: "Memory request (e.g. 1Gi, 512Mi)" + type: string + type: object + serviceHosts: + description: LLAP service hosts identifier for ZooKeeper registration + type: string + type: object + x-kubernetes-preserve-unknown-fields: true + metastore: + description: Metastore component configuration + properties: + configOverrides: + additionalProperties: + type: string + description: Additional configuration overrides as key-value pairs + type: object + database: + description: Database connection configuration for the metastore + backend + properties: + driver: + description: JDBC driver class name + type: string + driverJarUrl: + description: "URL to download the JDBC driver jar, e.g. https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + type: string + passwordSecretRef: + description: Reference to a Kubernetes Secret containing the + database password + properties: + key: + description: Key within the Secret + type: string + name: + description: Name of the Kubernetes Secret + type: string + type: object + type: + default: derby + description: "Database type: derby, mysql, postgres, mssql,\ + \ or oracle" + type: string + url: + description: JDBC connection URL + type: string + username: + description: Database username + type: string + type: object + enabled: + default: true + description: Whether the operator should deploy and manage a Metastore + type: boolean + externalUri: + description: Thrift URI of the external Metastore (if enabled + is false) + type: string + extraVolumeMounts: + description: Additional volume mounts for the container + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + extraVolumes: + description: "Additional volumes to attach to the pod (e.g., for\ + \ keytabs or truststores)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: Liveness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + readinessProbe: + description: Readiness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer + type: object + replicas: + default: 1 + description: Number of replicas + type: integer + resources: + description: Resource requirements for pods + properties: + limitsCpu: + description: "CPU limit (e.g. 2, 1000m)" + type: string + limitsMemory: + description: "Memory limit (e.g. 2Gi, 1024Mi)" + type: string + requestsCpu: + default: 500m + description: "CPU request (e.g. 500m, 1)" + type: string + requestsMemory: + default: 1Gi + description: "Memory request (e.g. 1Gi, 512Mi)" + type: string + type: object + warehouseDir: + default: /hive/warehouse + description: Warehouse directory path + type: string + type: object + x-kubernetes-preserve-unknown-fields: true + tezAm: + description: Tez Application Master configuration. Enabled by default. + properties: + configOverrides: + additionalProperties: + type: string + description: Additional configuration overrides as key-value pairs + type: object + enabled: + default: true + description: Whether Tez AM is enabled + type: boolean + extraVolumeMounts: + description: Additional volume mounts for the container + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + extraVolumes: + description: "Additional volumes to attach to the pod (e.g., for\ + \ keytabs or truststores)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + replicas: + default: 1 + description: Number of replicas + type: integer + resources: + description: Resource requirements for pods + properties: + limitsCpu: + description: "CPU limit (e.g. 2, 1000m)" + type: string + limitsMemory: + description: "Memory limit (e.g. 2Gi, 1024Mi)" + type: string + requestsCpu: + default: 500m + description: "CPU request (e.g. 500m, 1)" + type: string + requestsMemory: + default: 1Gi + description: "Memory request (e.g. 1Gi, 512Mi)" + type: string + type: object + scratchStorageClassName: + description: "StorageClass for the shared scratch PVC. Must support\ + \ ReadWriteMany access. If null, uses cluster default." + type: string + scratchStorageSize: + default: 1Gi + description: Storage size for the shared scratch PVC (ReadWriteMany) + mounted on HS2 and TezAM at /opt/hive/scratch + type: string + type: object + x-kubernetes-preserve-unknown-fields: true + volumeMounts: + description: "Volume mounts added to all component containers (e.g.,\ + \ mounting a GCS key file at /etc/gcs/key.json)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + volumes: + description: "Volumes added to all component pods (e.g., Secrets containing\ + \ keytabs or service account keys)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + zookeeper: + description: External ZooKeeper connection details (not managed by + this operator) + properties: + quorum: + description: ZooKeeper quorum connection string. This field is + strictly required. + type: string + required: + - quorum + type: object + required: + - zookeeper + type: object + x-kubernetes-preserve-unknown-fields: true + status: + properties: + conditions: + items: + properties: + lastTransitionTime: + type: string + message: + type: string + observedGeneration: + type: integer + reason: + type: string + status: + type: string + type: + type: string + type: object + type: array + hiveServer2: + properties: + desiredReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object + llap: + properties: + desiredReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object + metastore: + properties: + desiredReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object + observedGeneration: + type: integer + tezAm: + properties: + desiredReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/_helpers.tpl b/packaging/src/kubernetes/helm/hive-operator/templates/_helpers.tpl new file mode 100644 index 000000000000..ab398ff9fea1 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{/* Chart name */}} +{{- define "hive-operator.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* Fullname: release-name + chart-name */}} +{{- define "hive-operator.fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* Operator labels */}} +{{- define "hive-operator.labels" -}} +app.kubernetes.io/name: hive-kubernetes-operator +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version }} +{{- end }} + +{{/* Operator selector labels */}} +{{- define "hive-operator.selectorLabels" -}} +app.kubernetes.io/name: hive-kubernetes-operator +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* ServiceAccount name */}} +{{- define "hive-operator.serviceAccountName" -}} +{{- printf "%s" (include "hive-operator.fullname" .) }} +{{- end }} + +{{/* HiveCluster CR name */}} +{{- define "hive-operator.clusterName" -}} +{{- .Values.cluster.name | default .Release.Name }} +{{- end }} diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml new file mode 100644 index 000000000000..d27e1fea8c6f --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml @@ -0,0 +1,52 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "hive-operator.fullname" . }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} +rules: + # HiveCluster CRD management + - apiGroups: ["hive.apache.org"] + resources: ["hiveclusters", "hiveclusters/status", "hiveclusters/finalizers"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Deployments and StatefulSets + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Jobs for schema initialization + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Services, ConfigMaps, and PersistentVolumeClaims + - apiGroups: [""] + resources: ["services", "configmaps", "persistentvolumeclaims"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Secrets: read-only for DB password references + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "watch"] + # Events for status reporting + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + # Pods: read-only for readiness checking + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrolebinding.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrolebinding.yaml new file mode 100644 index 000000000000..10ef23316ab5 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrolebinding.yaml @@ -0,0 +1,31 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "hive-operator.fullname" . }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} +subjects: + - kind: ServiceAccount + name: {{ include "hive-operator.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "hive-operator.fullname" . }} diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/deployment.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/deployment.yaml new file mode 100644 index 000000000000..1c57badfeec0 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/deployment.yaml @@ -0,0 +1,46 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "hive-operator.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "hive-operator.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "hive-operator.selectorLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "hive-operator.serviceAccountName" . }} + containers: + - name: operator + image: "{{ .Values.operator.image.repository }}:{{ .Values.operator.image.tag }}" + imagePullPolicy: {{ .Values.operator.image.pullPolicy }} + env: + - name: OPERATOR_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + resources: + {{- toYaml .Values.operator.resources | nindent 12 }} diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml new file mode 100644 index 000000000000..504d6f20dc29 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml @@ -0,0 +1,181 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +{{- if .Values.cluster.enabled }} +apiVersion: hive.apache.org/v1alpha1 +kind: HiveCluster +metadata: + name: {{ include "hive-operator.clusterName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} +spec: + image: {{ .Values.cluster.image }} + imagePullPolicy: {{ .Values.cluster.imagePullPolicy }} + + metastore: + enabled: {{ .Values.cluster.metastore.enabled }} + {{- if .Values.cluster.metastore.enabled }} + replicas: {{ .Values.cluster.metastore.replicas }} + warehouseDir: {{ .Values.cluster.metastore.warehouseDir | quote }} + database: + type: {{ .Values.cluster.database.type | quote }} + {{- if .Values.cluster.database.url }} + url: {{ .Values.cluster.database.url | quote }} + {{- end }} + {{- if .Values.cluster.database.driver }} + driver: {{ .Values.cluster.database.driver | quote }} + {{- end }} + {{- if .Values.cluster.database.username }} + username: {{ .Values.cluster.database.username | quote }} + {{- end }} + {{- if and .Values.cluster.database.passwordSecretRef .Values.cluster.database.passwordSecretRef.name }} + passwordSecretRef: + name: {{ .Values.cluster.database.passwordSecretRef.name | quote }} + key: {{ .Values.cluster.database.passwordSecretRef.key | default "password" | quote }} + {{- end }} + {{- if .Values.cluster.database.driverJarUrl }} + driverJarUrl: {{ .Values.cluster.database.driverJarUrl | quote }} + {{- end }} + {{- if .Values.cluster.metastore.resources }} + resources: + {{- toYaml .Values.cluster.metastore.resources | nindent 6 }} + {{- end }} + {{- if .Values.cluster.metastore.configOverrides }} + configOverrides: + {{- toYaml .Values.cluster.metastore.configOverrides | nindent 6 }} + {{- end }} + {{- if .Values.cluster.metastore.extraVolumes }} + extraVolumes: + {{- toYaml .Values.cluster.metastore.extraVolumes | nindent 6 }} + {{- end }} + {{- if .Values.cluster.metastore.extraVolumeMounts }} + extraVolumeMounts: + {{- toYaml .Values.cluster.metastore.extraVolumeMounts | nindent 6 }} + {{- end }} + {{- else }} + {{- if .Values.cluster.metastore.externalUri }} + externalUri: {{ .Values.cluster.metastore.externalUri | quote }} + {{- end }} + {{- end }} + + hiveServer2: + replicas: {{ .Values.cluster.hiveServer2.replicas }} + serviceType: {{ .Values.cluster.hiveServer2.serviceType | quote }} + thriftPort: {{ .Values.cluster.hiveServer2.thriftPort }} + webUiPort: {{ .Values.cluster.hiveServer2.webUiPort }} + {{- if .Values.cluster.hiveServer2.resources }} + resources: + {{- toYaml .Values.cluster.hiveServer2.resources | nindent 6 }} + {{- end }} + {{- if .Values.cluster.hiveServer2.configOverrides }} + configOverrides: + {{- toYaml .Values.cluster.hiveServer2.configOverrides | nindent 6 }} + {{- end }} + {{- if .Values.cluster.hiveServer2.externalJars }} + externalJars: + {{- toYaml .Values.cluster.hiveServer2.externalJars | nindent 6 }} + {{- end }} + {{- if .Values.cluster.hiveServer2.extraVolumes }} + extraVolumes: + {{- toYaml .Values.cluster.hiveServer2.extraVolumes | nindent 6 }} + {{- end }} + {{- if .Values.cluster.hiveServer2.extraVolumeMounts }} + extraVolumeMounts: + {{- toYaml .Values.cluster.hiveServer2.extraVolumeMounts | nindent 6 }} + {{- end }} + + llap: + enabled: {{ .Values.cluster.llap.enabled }} + {{- if .Values.cluster.llap.enabled }} + replicas: {{ .Values.cluster.llap.replicas }} + executors: {{ .Values.cluster.llap.executors }} + memoryMb: {{ .Values.cluster.llap.memoryMb }} + serviceHosts: {{ .Values.cluster.llap.serviceHosts | quote }} + {{- if .Values.cluster.llap.resources }} + resources: + {{- toYaml .Values.cluster.llap.resources | nindent 6 }} + {{- end }} + {{- if .Values.cluster.llap.configOverrides }} + configOverrides: + {{- toYaml .Values.cluster.llap.configOverrides | nindent 6 }} + {{- end }} + {{- if .Values.cluster.llap.extraVolumes }} + extraVolumes: + {{- toYaml .Values.cluster.llap.extraVolumes | nindent 6 }} + {{- end }} + {{- if .Values.cluster.llap.extraVolumeMounts }} + extraVolumeMounts: + {{- toYaml .Values.cluster.llap.extraVolumeMounts | nindent 6 }} + {{- end }} + {{- end }} + + tezAm: + enabled: {{ .Values.cluster.tezAm.enabled }} + {{- if .Values.cluster.tezAm.enabled }} + replicas: {{ .Values.cluster.tezAm.replicas }} + scratchStorageSize: {{ .Values.cluster.tezAm.scratchStorageSize | quote }} + {{- if .Values.cluster.tezAm.scratchStorageClassName }} + scratchStorageClassName: {{ .Values.cluster.tezAm.scratchStorageClassName | quote }} + {{- end }} + {{- if .Values.cluster.tezAm.resources }} + resources: + {{- toYaml .Values.cluster.tezAm.resources | nindent 6 }} + {{- end }} + {{- if .Values.cluster.tezAm.configOverrides }} + configOverrides: + {{- toYaml .Values.cluster.tezAm.configOverrides | nindent 6 }} + {{- end }} + {{- if .Values.cluster.tezAm.extraVolumes }} + extraVolumes: + {{- toYaml .Values.cluster.tezAm.extraVolumes | nindent 6 }} + {{- end }} + {{- if .Values.cluster.tezAm.extraVolumeMounts }} + extraVolumeMounts: + {{- toYaml .Values.cluster.tezAm.extraVolumeMounts | nindent 6 }} + {{- end }} + {{- end }} + + zookeeper: + quorum: {{ .Values.cluster.zookeeper.quorum | quote }} + + {{- if .Values.cluster.storage.coreSiteOverrides }} + hadoop: + coreSiteOverrides: + {{- toYaml .Values.cluster.storage.coreSiteOverrides | nindent 6 }} + {{- end }} + + {{- if .Values.cluster.storage.envVars }} + envVars: + {{- toYaml .Values.cluster.storage.envVars | nindent 4 }} + {{- end }} + + {{- if .Values.cluster.storage.externalJars }} + externalJars: + {{- toYaml .Values.cluster.storage.externalJars | nindent 4 }} + {{- end }} + + {{- if .Values.cluster.storage.volumes }} + volumes: + {{- toYaml .Values.cluster.storage.volumes | nindent 4 }} + {{- end }} + + {{- if .Values.cluster.storage.volumeMounts }} + volumeMounts: + {{- toYaml .Values.cluster.storage.volumeMounts | nindent 4 }} + {{- end }} +{{- end }} diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/serviceaccount.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/serviceaccount.yaml new file mode 100644 index 000000000000..39085bf90d3b --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/templates/serviceaccount.yaml @@ -0,0 +1,24 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "hive-operator.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "hive-operator.labels" . | nindent 4 }} diff --git a/packaging/src/kubernetes/helm/hive-operator/values.yaml b/packaging/src/kubernetes/helm/hive-operator/values.yaml new file mode 100644 index 000000000000..093e104cf746 --- /dev/null +++ b/packaging/src/kubernetes/helm/hive-operator/values.yaml @@ -0,0 +1,158 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ============================================================================= +# Hive Kubernetes Operator Helm Values +# +# QUICK START: Only set values under "cluster.database", "cluster.zookeeper", +# and "cluster.storage" — the chart defaults to a Full-HA cluster with +# Metastore, HiveServer2, LLAP, and Tez AM all enabled. +# ============================================================================= + +# -- Operator Deployment Configuration ---------------------------------------- + +operator: + image: + repository: apache/hive + tag: "operator-4.3.0-SNAPSHOT" + pullPolicy: IfNotPresent + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + memory: 512Mi + +# -- HiveCluster Instance Configuration --------------------------------------- + +# Set to false to install only the operator (no HiveCluster CR created). +# Useful if you want to manage HiveCluster CRs yourself. +cluster: + enabled: true + name: hive + + # Hive component image (not the operator image) + image: "apache/hive:4.3.0-SNAPSHOT" + imagePullPolicy: IfNotPresent + + # --------------------------------------------------------------------------- + # DATABASE (Required) — RDBMS for the Hive Metastore backend + # --------------------------------------------------------------------------- + database: + type: postgres + url: "" # e.g. "jdbc:postgresql://postgres:5432/metastore" + driver: "" # e.g. "org.postgresql.Driver" + username: "" # e.g. "hive" + passwordSecretRef: + name: "" # e.g. "hive-db-secret" + key: "" # e.g. "password" + driverJarUrl: "" # e.g. "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + # --------------------------------------------------------------------------- + # ZOOKEEPER (Required) — for Tez session registry and LLAP + # --------------------------------------------------------------------------- + zookeeper: + quorum: "" # e.g. "zookeeper:2181" + + # --------------------------------------------------------------------------- + # STORAGE (Required) — Hadoop filesystem configuration + # --------------------------------------------------------------------------- + storage: + # core-site.xml properties (filesystem endpoint, auth settings, etc.) + coreSiteOverrides: {} + # fs.defaultFS: "s3a://hive" + # fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + # fs.s3a.path.style.access: "true" + + # Environment variables injected into all pods (credentials, HADOOP_OPTIONAL_TOOLS) + envVars: [] + # - name: HADOOP_OPTIONAL_TOOLS + # value: "hadoop-aws" + # - name: AWS_ACCESS_KEY_ID + # value: "ozone" + # - name: AWS_SECRET_ACCESS_KEY + # value: "ozone" + + # URLs of connector JARs downloaded at pod startup (GCS, ABFS connectors) + externalJars: [] + # - "https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.25/gcs-connector-hadoop3-2.2.25-shaded.jar" + + # Volumes added to all pods (for mounting credential files) + volumes: [] + # - name: gcs-key + # secret: + # secretName: gcs-creds + + # Volume mounts added to all containers + volumeMounts: [] + # - name: gcs-key + # mountPath: /etc/gcs + # readOnly: true + + # --------------------------------------------------------------------------- + # METASTORE — defaults to enabled, 2 replicas (HA) + # --------------------------------------------------------------------------- + metastore: + enabled: true + replicas: 2 + warehouseDir: "/hive/warehouse" + resources: {} + configOverrides: {} + extraVolumes: [] + extraVolumeMounts: [] + # Set to use an external Metastore instead of deploying one: + # enabled: false + # externalUri: "thrift://external-metastore:9083" + + # --------------------------------------------------------------------------- + # HIVESERVER2 — defaults to 2 replicas (HA) + # --------------------------------------------------------------------------- + hiveServer2: + replicas: 2 + serviceType: ClusterIP + thriftPort: 10000 + webUiPort: 10002 + resources: {} + configOverrides: {} + externalJars: [] + extraVolumes: [] + extraVolumeMounts: [] + + # --------------------------------------------------------------------------- + # LLAP — enabled by default for full-HA + # --------------------------------------------------------------------------- + llap: + enabled: true + replicas: 2 + executors: 1 + memoryMb: 1024 + serviceHosts: "@llap0" + resources: {} + configOverrides: {} + extraVolumes: [] + extraVolumeMounts: [] + + # --------------------------------------------------------------------------- + # TEZ AM — enabled by default for full-HA + # --------------------------------------------------------------------------- + tezAm: + enabled: true + replicas: 2 + scratchStorageSize: "1Gi" + scratchStorageClassName: "" + resources: {} + configOverrides: {} + extraVolumes: [] + extraVolumeMounts: [] diff --git a/packaging/src/kubernetes/pom.xml b/packaging/src/kubernetes/pom.xml new file mode 100644 index 000000000000..342441730cc5 --- /dev/null +++ b/packaging/src/kubernetes/pom.xml @@ -0,0 +1,204 @@ + + + + 4.0.0 + + org.apache.hive + hive + 4.3.0-SNAPSHOT + ../../../pom.xml + + hive-kubernetes-operator + jar + Hive Kubernetes Operator + Kubernetes operator for managing Apache Hive clusters + + ../../.. + + + + io.javaoperatorsdk + operator-framework + ${josdk.version} + + + io.javaoperatorsdk + operator-framework-core + ${josdk.version} + + + io.fabric8 + kubernetes-client + ${fabric8.version} + + + io.fabric8 + kubernetes-httpclient-vertx + ${fabric8.version} + + + io.github.java-diff-utils + java-diff-utils + 4.17 + + + io.vertx + vertx-web-common + 4.5.27 + + + + io.fabric8 + crd-generator-apt + ${fabric8.version} + provided + + + org.apache.logging.log4j + log4j-slf4j-impl + ${log4j2.version} + + + org.apache.logging.log4j + log4j-core + ${log4j2.version} + + + + src/java + + + src/resources + + + + + org.apache.maven.plugins + maven-compiler-plugin + + full + + + io.fabric8 + crd-generator-apt + ${fabric8.version} + + + + + + org.apache.maven.plugins + maven-shade-plugin + ${maven.shade.plugin.version} + + + package + + shade + + + false + true + shaded + + + org.apache.hive.kubernetes.operator.HiveOperatorMain + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + org.apache.rat + apache-rat-plugin + + + helm/hive-operator/crds/** + + + + + + org.apache.maven.plugins + maven-antrun-plugin + + + copy-crd-to-helm-crds + compile + + run + + + + + + + + + + + + + + + + + + + + + kubernetes + + + + org.codehaus.mojo + exec-maven-plugin + + + docker-build + package + + exec + + + docker + + build + -t + apache/hive:operator-${project.version} + . + + + + + + + + + + diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java new file mode 100644 index 000000000000..55bd3372a40d --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator; + +import io.javaoperatorsdk.operator.Operator; +import org.apache.hive.kubernetes.operator.reconciler.HiveClusterReconciler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Entry point for the Hive Kubernetes Operator process. */ +public final class HiveOperatorMain { + + private static final Logger LOG = + LoggerFactory.getLogger(HiveOperatorMain.class); + + private HiveOperatorMain() { + } + + /** Starts the operator, registers reconcilers, and blocks until shutdown. */ + public static void main(String[] args) { + LOG.info("Starting Hive Kubernetes Operator"); + Operator operator = new Operator(); + operator.register(new HiveClusterReconciler()); + operator.start(); + LOG.info("Hive Kubernetes Operator started successfully"); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java new file mode 100644 index 000000000000..6c0f9308dbc1 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.Map; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Hadoop core-site.xml ConfigMap for filesystem configuration. */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=hadoop-config," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class HadoopConfigMapDependent + extends HiveDependentResource { + + public static final String COMPONENT = "hadoop-config"; + + public HadoopConfigMapDependent() { + super(ConfigMap.class); + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, + Context context) { + Map props = + HiveConfigBuilder.getHadoopCoreSite(hiveCluster.getSpec()); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .addToData("core-site.xml", HadoopXmlBuilder.buildXml(props)) + .build(); + } + + /** Returns the ConfigMap resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hadoop-config"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java new file mode 100644 index 000000000000..88ad8032c2c1 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java @@ -0,0 +1,426 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import io.fabric8.kubernetes.api.model.Affinity; +import io.fabric8.kubernetes.api.model.AffinityBuilder; +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerBuilder; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.EnvVarBuilder; +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.fabric8.kubernetes.api.model.Quantity; +import io.fabric8.kubernetes.api.model.ResourceRequirements; +import io.fabric8.kubernetes.api.model.Probe; +import io.fabric8.kubernetes.api.model.ProbeBuilder; +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.ResourceRequirementsBuilder; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeBuilder; +import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.VolumeMountBuilder; +import io.fabric8.kubernetes.client.KubernetesClientException; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.Matcher; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; +import org.apache.hive.kubernetes.operator.model.spec.ResourceRequirementsSpec; + +import org.apache.hive.kubernetes.operator.model.spec.SecretKeyRef; +import org.apache.hive.kubernetes.operator.model.spec.ProbeSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Base class for all Hive operator dependent resources. + *

+ * Overrides {@link #getSecondaryResource} to use this dependent's own + * event source instead of the generic type-based lookup. This is + * required because JOSDK's default implementation calls + * {@code context.getSecondaryResource(type)} which throws when + * multiple dependents manage the same Kubernetes resource type + * (e.g. multiple ConfigMap or Service dependents). + */ +public abstract class HiveDependentResource + extends CRUDKubernetesDependentResource { + + private static final Logger LOG = + LoggerFactory.getLogger(HiveDependentResource.class); + + protected static final String CONF_MOUNT_PATH = "/etc/hive/conf"; + protected static final String HIVE_CONF_DIR = "/opt/hive/conf"; + protected static final String EXT_JARS_PATH = "/tmp/ext-jars"; + + protected HiveDependentResource(Class resourceType) { + super(resourceType); + } + + /** + * Catches 409 AlreadyExists during resource creation caused by + * informer lag — the resource exists on the API server but + * the informer cache hasn't indexed it yet, so JOSDK calls + * create directly. + */ + @Override + protected R handleCreate(R desired, P primary, Context

context) { + try { + return super.handleCreate(desired, primary, context); + } catch (KubernetesClientException e) { + if (e.getCode() == 409) { + LOG.info("Resource {} already exists (informer lag), " + + "will reconcile on next event", + desired.getMetadata().getName()); + return desired; + } + throw e; + } + } + + @Override + public Optional getSecondaryResource(P primary, + Context

context) { + return eventSource() + .flatMap(es -> es.getSecondaryResource(primary)); + } + + /** + * Jobs and PVCs are immutable after creation — Kubernetes rejects + * any PUT that modifies spec.selector, spec.template (Job) or + * spec.resources/accessModes (PVC). Short-circuit the match to + * prevent the framework from attempting updates on these resources. + */ + @Override + public Matcher.Result match(R actualResource, R desired, + P primary, Context

context) { + if (actualResource != null) { + String kind = actualResource.getKind(); + if ("Job".equals(kind) + || "PersistentVolumeClaim".equals(kind)) { + return Matcher.Result.nonComputed(true); + } + } + return super.match(actualResource, desired, primary, context); + } + + /** + * Computes a SHA-256 hash of the given input strings. + * Used to annotate pod templates so that config changes trigger rolling updates. + */ + protected static String sha256(String... inputs) { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + for (String input : inputs) { + if (input != null) { + digest.update(input.getBytes(StandardCharsets.UTF_8)); + } + } + byte[] hash = digest.digest(); + StringBuilder sb = new StringBuilder(64); + for (byte b : hash) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } catch (Exception e) { + return "unknown"; + } + } + + /** + * Builds the database connection env vars: DB_DRIVER, DBPASSWORD + * (from SecretKeyRef), and SERVICE_OPTS with javax.jdo connection + * properties. Shared by MetastoreDeploymentDependent and + * SchemaInitJobDependent. + */ + protected static List buildDbEnvVars(DatabaseConfig db) { + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("DB_DRIVER", db.type(), null)); + + // DBPASSWORD must be defined before SERVICE_OPTS so that + // Kubernetes $(DBPASSWORD) interpolation resolves correctly. + SecretKeyRef passwordRef = db.passwordSecretRef(); + if (passwordRef != null) { + envVars.add(new EnvVarBuilder() + .withName("DBPASSWORD") + .withNewValueFrom() + .withNewSecretKeyRef() + .withName(passwordRef.name()) + .withKey(passwordRef.key()) + .endSecretKeyRef() + .endValueFrom() + .build()); + } + + StringBuilder serviceOpts = new StringBuilder(); + if (db.url() != null) { + serviceOpts.append("-Djavax.jdo.option.ConnectionURL=") + .append(db.url()); + } + if (db.driver() != null) { + serviceOpts.append(" -Djavax.jdo.option.ConnectionDriverName=") + .append(db.driver()); + } + if (db.username() != null) { + serviceOpts.append(" -Djavax.jdo.option.ConnectionUserName=") + .append(db.username()); + } + if (passwordRef != null) { + serviceOpts.append( + " -Djavax.jdo.option.ConnectionPassword=$(DBPASSWORD)"); + } + if (!serviceOpts.isEmpty()) { + envVars.add(new EnvVar("SERVICE_OPTS", + serviceOpts.toString().trim(), null)); + } + return envVars; + } + + + /** Builds a projected Volume merging multiple ConfigMaps. */ + protected static Volume buildProjectedConfigVolume( + String volumeName, String... configMapNames) { + List + projections = new ArrayList<>(); + for (String cmName : configMapNames) { + projections.add( + new io.fabric8.kubernetes.api.model.VolumeProjectionBuilder() + .withNewConfigMap().withName(cmName).endConfigMap() + .build()); + } + return new VolumeBuilder() + .withName(volumeName) + .withNewProjected() + .withSources(projections) + .endProjected() + .build(); + } + + + /** + * Populates volume mounts and volumes for the Metastore pod spec + * (shared by MetastoreDeploymentDependent and SchemaInitJobDependent). + * Adds the projected hive-config volume (merging metastore + hadoop + * ConfigMaps). External JARs (JDBC driver + global externalJars) + * should be handled separately via {@link #addExternalJars}. + */ + protected static void buildMetastoreVolumes( + HiveCluster hiveCluster, + List volumeMounts, + List volumes) { + + volumeMounts.add(new VolumeMountBuilder() + .withName("hive-config") + .withMountPath(CONF_MOUNT_PATH).build()); + + volumes.add(buildProjectedConfigVolume("hive-config", + MetastoreConfigMapDependent.resourceName(hiveCluster), + HadoopConfigMapDependent.resourceName(hiveCluster))); + } + + /** Builds Kubernetes ResourceRequirements from the operator's spec. */ + protected static ResourceRequirements buildResources(ResourceRequirementsSpec spec) { + if (spec == null) { + return new ResourceRequirements(); + } + ResourceRequirementsBuilder builder = new ResourceRequirementsBuilder(); + if (spec.requestsCpu() != null) { + builder.addToRequests("cpu", new Quantity(spec.requestsCpu())); + } + if (spec.requestsMemory() != null) { + builder.addToRequests("memory", new Quantity(spec.requestsMemory())); + } + if (spec.limitsCpu() != null) { + builder.addToLimits("cpu", new Quantity(spec.limitsCpu())); + } + if (spec.limitsMemory() != null) { + builder.addToLimits("memory", new Quantity(spec.limitsMemory())); + } + return builder.build(); + } + + /** + * Sets a preferred pod anti-affinity on the pod spec if no affinity is + * already defined. This spreads replicas across nodes while allowing + * future user-defined affinity to take precedence. + */ + protected static void applySpreadAffinityIfAbsent( + io.fabric8.kubernetes.api.model.PodSpec podSpec, + Map selectorLabels) { + if (podSpec.getAffinity() != null) { + return; + } + podSpec.setAffinity(new AffinityBuilder() + .withNewPodAntiAffinity() + .addNewPreferredDuringSchedulingIgnoredDuringExecution() + .withWeight(100) + .withNewPodAffinityTerm() + .withNewLabelSelector() + .withMatchLabels(selectorLabels) + .endLabelSelector() + .withTopologyKey("kubernetes.io/hostname") + .endPodAffinityTerm() + .endPreferredDuringSchedulingIgnoredDuringExecution() + .endPodAntiAffinity() + .build()); + } + + /** + * Builds an init container that downloads external JARs via wget + * (for http/https URLs) or hadoop fs (for HDFS/cloud paths). + */ + protected static Container buildExternalJarsInitContainer( + String image, List externalJars, + List envVars, List volumeMounts, + String containerName) { + + // Determine target directory from the first volume mount + String targetDir = volumeMounts.get(0).getMountPath(); + + StringBuilder cmd = new StringBuilder(); + cmd.append("export HADOOP_CONF_DIR=").append(CONF_MOUNT_PATH).append(" && "); + + for (String jarUrl : externalJars) { + if (jarUrl.startsWith("http://") || jarUrl.startsWith("https://")) { + cmd.append("wget -q --tries=3 --waitretry=5 -P ").append(targetDir) + .append(" '").append(jarUrl).append("' && "); + } else { + cmd.append("{ ok=0; for i in 1 2 3; do hadoop fs -copyToLocal '").append(jarUrl) + .append("' ").append(targetDir).append("/ && ok=1 && break || sleep 5; done; ") + .append("[ $ok -eq 1 ]; } && "); + } + } + cmd.append("echo 'All external JARs downloaded successfully.'"); + + return new ContainerBuilder() + .withName(containerName) + .withImage(image) + .withCommand("/bin/bash", "-c", cmd.toString()) + .withEnv(envVars) + .withVolumeMounts(volumeMounts) + .build(); + } + + /** + * Replaces the directory-level CONF_MOUNT_PATH volume mount with + * individual subPath mounts into HIVE_CONF_DIR (/opt/hive/conf/). + *

+ * This avoids the broken-symlink problem: Kubernetes projected volumes + * use internal timestamped directories that rotate on ConfigMap updates. + * The Hive Docker entrypoint symlinks resolved paths (not the stable + * {@code ..data/} link), so symlinks break when the directory rotates. + * subPath mounts place files directly without symlink indirection. + *

+ * Call this AFTER {@code addGlobalExternalJars} so the init container + * can still find the CONF_MOUNT_PATH mount. + */ + protected static void replaceConfMountWithSubPaths( + List volumeMounts, String volumeName, + String... fileNames) { + volumeMounts.removeIf( + vm -> vm.getMountPath().equals(CONF_MOUNT_PATH)); + for (String file : fileNames) { + volumeMounts.add(new VolumeMountBuilder() + .withName(volumeName) + .withMountPath(HIVE_CONF_DIR + "/" + file) + .withSubPath(file) + .build()); + } + } + + + /** + * Adds external JAR download init container, volume, and + * volume mount. Downloads to /tmp/ext-jars so the native + * Hive entrypoint.sh automatically copies them to $HIVE_HOME/lib. + */ + protected static void addExternalJars( + String image, + List jars, + List initContainers, + List volumeMounts, + List volumes, + List envVars) { + if (jars == null || jars.isEmpty()) { + return; + } + + VolumeMount extMount = new VolumeMountBuilder() + .withName("ext-jars") + .withMountPath(EXT_JARS_PATH).build(); + + // Add volume mount for the main container + volumeMounts.add(extMount); + + // Add emptyDir volume + volumes.add(new VolumeBuilder() + .withName("ext-jars") + .withNewEmptyDir().endEmptyDir().build()); + + // Build init container with config mount + ext-jars mount + List initMounts = new ArrayList<>(); + initMounts.add(extMount); + for (VolumeMount vm : volumeMounts) { + if (vm.getMountPath().equals(CONF_MOUNT_PATH)) { + initMounts.add(vm); + break; + } + } + + initContainers.add( + buildExternalJarsInitContainer(image, jars, + envVars, initMounts, "download-ext-jars")); + } + + /** + * Builds a TCP socket probe using user-provided overrides or fallback defaults. + */ + protected static Probe buildTcpProbe(int port, ProbeSpec spec, int defaultInitialDelay, int defaultPeriod, + int defaultFailureThreshold) { + + int initialDelay = + (spec != null && spec.initialDelaySeconds() != null) ? spec.initialDelaySeconds() : defaultInitialDelay; + int period = (spec != null && spec.periodSeconds() != null) ? spec.periodSeconds() : defaultPeriod; + int failureThreshold = + (spec != null && spec.failureThreshold() != null) ? spec.failureThreshold() : defaultFailureThreshold; + + ProbeBuilder builder = new ProbeBuilder() + .withNewTcpSocket() + .withPort(new IntOrString(port)) + .endTcpSocket() + .withInitialDelaySeconds(initialDelay) + .withPeriodSeconds(period) + .withFailureThreshold(failureThreshold); + + if (spec != null && spec.timeoutSeconds() != null) { + builder.withTimeoutSeconds(spec.timeoutSeconds()); + } + if (spec != null && spec.successThreshold() != null) { + builder.withSuccessThreshold(spec.successThreshold()); + } + return builder.build(); + } + +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java new file mode 100644 index 000000000000..9bb0597cc960 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.Map; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the hive-site.xml ConfigMap for HiveServer2. */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class HiveServer2ConfigMapDependent + extends HiveDependentResource { + + public static final String COMPONENT = "hiveserver2"; + + public HiveServer2ConfigMapDependent() { + super(ConfigMap.class); + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + + Map props = + HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec); + Map tezProps = HiveConfigBuilder.getTezSite(spec); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .addToData("hive-site.xml", HadoopXmlBuilder.buildXml(props)) + .addToData("tez-site.xml", HadoopXmlBuilder.buildXml(tezProps)) + .build(); + } + + /** Returns the ConfigMap resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2-config"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java new file mode 100644 index 000000000000..1455ca0f7591 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.ContainerPortBuilder; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Probe; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Kubernetes Deployment for HiveServer2. */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class HiveServer2DeploymentDependent + extends HiveDependentResource { + + public static final String COMPONENT = "hiveserver2"; + private static final String SCRATCH_MOUNT_PATH = "/opt/hive/scratch"; + + public HiveServer2DeploymentDependent() { + super(Deployment.class); + } + + @Override + protected Deployment desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + HiveServer2Spec hs2 = spec.hiveServer2(); + Map selectorLabels = + Labels.selectorForComponent(hiveCluster, COMPONENT); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "hiveserver2", null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.add(new EnvVar("TEZ_AM_EXTERNAL_ID", + "tez-session-hs2", null)); + + // User-provided env vars (storage credentials, etc.) + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + // Env vars consumed by the Hive Docker entrypoint.sh to + // configure Tez execution mode at container startup. + if (spec.tezAm().isEnabled()) { + envVars.add(new EnvVar("HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS", + "true", null)); + envVars.add(new EnvVar("TEZ_FRAMEWORK_MODE", + "STANDALONE_ZOOKEEPER", null)); + envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", + spec.zookeeper().quorum(), null)); + } + + if (spec.llap().isEnabled()) { + envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", + spec.llap().serviceHosts(), null)); + } + + String metastoreUri = spec.metastore().isEnabled() ? + "thrift://" + hiveCluster.getMetadata().getName() + "-metastore:9083" : + spec.metastore().externalUri(); + StringBuilder serviceOpts = new StringBuilder(); + if (metastoreUri != null && !metastoreUri.isEmpty()) { + serviceOpts.append("-Dhive.metastore.uris=").append(metastoreUri); + } + if (spec.llap().isEnabled()) { + serviceOpts.append(" -Dhive.execution.mode=llap"); + serviceOpts.append(" -Dhive.llap.daemon.service.hosts=") + .append(spec.llap().serviceHosts()); + } + if (spec.tezAm().isEnabled()) { + serviceOpts.append(" -Dhive.zookeeper.quorum=") + .append(spec.zookeeper().quorum()); + } + envVars.add(new EnvVar("SERVICE_OPTS", + serviceOpts.toString(), null)); + + List ports = List.of( + new ContainerPortBuilder() + .withName("thrift") + .withContainerPort(hs2.thriftPort()).build(), + new ContainerPortBuilder() + .withName("webui") + .withContainerPort(hs2.webUiPort()).build() + ); + + Probe readinessProbe = buildTcpProbe(hs2.thriftPort(), hs2.readinessProbe(), 15, 10, 3); + Probe livenessProbe = buildTcpProbe(hs2.thriftPort(), hs2.livenessProbe(), 120, 30, 10); + + boolean tezAmEnabled = spec.tezAm().isEnabled(); + + // Build volume mounts and volumes lists up front so the + // Deployment is constructed in a single builder chain. + // Using editFirstContainer() caused JOSDK SSA comparison + // mismatches that triggered infinite reconciliation loops. + List volumeMounts = + new ArrayList<>(); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("hive-config").withMountPath(CONF_MOUNT_PATH).build()); + + List volumes = + new ArrayList<>(); + volumes.add(buildProjectedConfigVolume("hive-config", + HiveServer2ConfigMapDependent.resourceName(hiveCluster), + HadoopConfigMapDependent.resourceName(hiveCluster))); + + if (tezAmEnabled) { + volumeMounts.add( + new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("scratch") + .withMountPath(SCRATCH_MOUNT_PATH).build()); + volumes.add(new io.fabric8.kubernetes.api.model.VolumeBuilder() + .withName("scratch") + .withNewPersistentVolumeClaim() + .withClaimName(ScratchPvcDependent + .resourceName(hiveCluster)) + .endPersistentVolumeClaim() + .build()); + } + + List initContainers = new ArrayList<>(); + List allJars = new ArrayList<>(); + if (spec.externalJars() != null) { + allJars.addAll(spec.externalJars()); + } + if (hs2.externalJars() != null) { + allJars.addAll(hs2.externalJars()); + } + addExternalJars(spec.image(), allJars, + initContainers, volumeMounts, volumes, envVars); + replaceConfMountWithSubPaths(volumeMounts, "hive-config", + "hive-site.xml", "tez-site.xml", "core-site.xml"); + + // Pre-compute config hash for the pod template annotation. + // This ensures the Deployment is created with the correct hash + // from the start (single ReplicaSet) and triggers rolling + // updates when ConfigMap content changes. + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + Deployment deployment = new DeploymentBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withReplicas(hs2.replicas()) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .addToAnnotations("kubectl.kubernetes.io/default-container", "hiveserver2") + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName("hiveserver2") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withPorts(ports) + .withReadinessProbe(readinessProbe) + .withLivenessProbe(livenessProbe) + .withResources(buildResources(hs2.resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + applySpreadAffinityIfAbsent( + deployment.getSpec().getTemplate().getSpec(), selectorLabels); + + if (spec.volumes() != null) { + deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); + } + if (spec.volumeMounts() != null) { + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.volumeMounts()); + } + if (hs2.extraVolumes() != null) { + deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(hs2.extraVolumes()); + } + if (hs2.extraVolumeMounts() != null) { + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(hs2.extraVolumeMounts()); + } + + return deployment; + } + + /** Returns the Deployment resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java new file mode 100644 index 000000000000..e67ff8289e5f --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Kubernetes Service for HiveServer2 (Thrift and WebUI ports). */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class HiveServer2ServiceDependent + extends HiveDependentResource { + + public HiveServer2ServiceDependent() { + super(Service.class); + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + HiveServer2Spec hs2 = hiveCluster.getSpec().hiveServer2(); + + return new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-hiveserver2") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, + HiveServer2DeploymentDependent.COMPONENT)) + .endMetadata() + .withNewSpec() + .withType(hs2.serviceType()) + .withSelector(Labels.selectorForComponent(hiveCluster, + HiveServer2DeploymentDependent.COMPONENT)) + .addNewPort() + .withName("thrift") + .withPort(hs2.thriftPort()) + .withTargetPort(new IntOrString(hs2.thriftPort())) + .endPort() + .addNewPort() + .withName("webui") + .withPort(hs2.webUiPort()) + .withTargetPort(new IntOrString(hs2.webUiPort())) + .endPort() + .endSpec() + .build(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java new file mode 100644 index 000000000000..2ad6955dadb8 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.Map; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the llap-daemon-site.xml ConfigMap for LLAP daemons. */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class LlapConfigMapDependent + extends HiveDependentResource { + + public static final String COMPONENT = "llap"; + + public LlapConfigMapDependent() { + super(ConfigMap.class); + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, + Context context) { + Map props = + HiveConfigBuilder.getLlapDaemonSite(hiveCluster.getSpec()); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .addToData("llap-daemon-site.xml", + HadoopXmlBuilder.buildXml(props)) + .build(); + } + + /** Returns the ConfigMap resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-llap-config"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java new file mode 100644 index 000000000000..108f29347a97 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the headless Kubernetes Service for LLAP daemons. + * Required by the StatefulSet for stable DNS entries and ZooKeeper registration. + */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class LlapServiceDependent + extends HiveDependentResource { + + public LlapServiceDependent() { + super(Service.class); + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + return new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-llap") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, + LlapStatefulSetDependent.COMPONENT)) + .endMetadata() + .withNewSpec() + .withClusterIP("None") + .withSelector(Labels.selectorForComponent(hiveCluster, + LlapStatefulSetDependent.COMPONENT)) + .addNewPort() + .withName("management") + .withPort(15004) + .withTargetPort(new IntOrString(15004)) + .endPort() + .addNewPort() + .withName("shuffle") + .withPort(15551) + .withTargetPort(new IntOrString(15551)) + .endPort() + .addNewPort() + .withName("web") + .withPort(15002) + .withTargetPort(new IntOrString(15002)) + .endPort() + .endSpec() + .build(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java new file mode 100644 index 000000000000..c8c044d22ce9 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.ContainerPortBuilder; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Probe; +import io.fabric8.kubernetes.api.model.apps.StatefulSet; +import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the Kubernetes StatefulSet for LLAP daemons. + * Uses StatefulSet for stable pod identities required by ZooKeeper registration. + */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class LlapStatefulSetDependent + extends HiveDependentResource { + + public static final String COMPONENT = "llap"; + + public LlapStatefulSetDependent() { + super(StatefulSet.class); + } + + @Override + protected StatefulSet desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + LlapSpec llap = spec.llap(); + Map selectorLabels = + Labels.selectorForComponent(hiveCluster, COMPONENT); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "llap", null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.add(new EnvVar("LLAP_MEMORY_MB", + String.valueOf(llap.memoryMb()), null)); + envVars.add(new EnvVar("LLAP_EXECUTORS", + String.valueOf(llap.executors()), null)); + envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", + spec.zookeeper().quorum(), null)); + envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", + llap.serviceHosts(), null)); + + // User-provided env vars (storage credentials, etc.) + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + List ports = List.of( + new ContainerPortBuilder() + .withName("management").withContainerPort(15004).build(), + new ContainerPortBuilder() + .withName("shuffle").withContainerPort(15551).build(), + new ContainerPortBuilder() + .withName("web").withContainerPort(15002).build(), + new ContainerPortBuilder() + .withName("output").withContainerPort(15003).build() + ); + + Probe readinessProbe = buildTcpProbe(15004, llap.readinessProbe(), 15, 10, 3); + + String headlessServiceName = + hiveCluster.getMetadata().getName() + "-llap"; + + List volumeMounts = + new ArrayList<>(); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("llap-config") + .withMountPath(CONF_MOUNT_PATH).build()); + + List volumes = + new ArrayList<>(); + volumes.add(buildProjectedConfigVolume("llap-config", + LlapConfigMapDependent.resourceName(hiveCluster), + HadoopConfigMapDependent.resourceName(hiveCluster))); + + List initContainers = new ArrayList<>(); + addExternalJars(spec.image(), spec.externalJars(), + initContainers, volumeMounts, volumes, envVars); + replaceConfMountWithSubPaths(volumeMounts, "llap-config", + "llap-daemon-site.xml", "core-site.xml"); + + // Pre-compute config hash for the pod template annotation. + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getLlapDaemonSite(spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + StatefulSet statefulSet = new StatefulSetBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withReplicas(llap.replicas()) + .withServiceName(headlessServiceName) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .addToAnnotations("kubectl.kubernetes.io/default-container", "llap") + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName("llap") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withPorts(ports) + .withReadinessProbe(readinessProbe) + .withResources(buildResources(llap.resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + applySpreadAffinityIfAbsent( + statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); + + if (spec.volumes() != null) { + statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); + } + if (spec.volumeMounts() != null) { + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.volumeMounts()); + } + if (llap.extraVolumes() != null) { + statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(llap.extraVolumes()); + } + if (llap.extraVolumeMounts() != null) { + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(llap.extraVolumeMounts()); + } + return statefulSet; + } + + /** Returns the StatefulSet resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-llap"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java new file mode 100644 index 000000000000..b429335f76e0 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.Map; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the metastore-site.xml ConfigMap for the Hive Metastore. */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class MetastoreConfigMapDependent + extends HiveDependentResource { + + public static final String COMPONENT = "metastore"; + + public MetastoreConfigMapDependent() { + super(ConfigMap.class); + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, + Context context) { + Map props = + HiveConfigBuilder.getMetastoreSite(hiveCluster.getSpec()); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .addToData("metastore-site.xml", HadoopXmlBuilder.buildXml(props)) + .build(); + } + + /** Returns the ConfigMap resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-metastore-config"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java new file mode 100644 index 000000000000..e40b9add097c --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.ContainerPortBuilder; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Probe; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Kubernetes Deployment for the Hive Metastore. */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class MetastoreDeploymentDependent + extends HiveDependentResource { + + public static final String COMPONENT = "metastore"; + + public MetastoreDeploymentDependent() { + super(Deployment.class); + } + + @Override + protected Deployment desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + DatabaseConfig db = spec.metastore().database(); + Map selectorLabels = + Labels.selectorForComponent(hiveCluster, COMPONENT); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "metastore", null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.addAll(buildDbEnvVars(db)); + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + List ports = List.of( + new ContainerPortBuilder() + .withName("thrift").withContainerPort(9083).build(), + new ContainerPortBuilder() + .withName("rest").withContainerPort(9001).build() + ); + + Probe readinessProbe = buildTcpProbe(9083, spec.metastore().readinessProbe(), 15, 10, 3); + Probe livenessProbe = buildTcpProbe(9083, spec.metastore().livenessProbe(), 60, 30, 5); + + List initContainers = new ArrayList<>(); + List volumeMounts = new ArrayList<>(); + List volumes = new ArrayList<>(); + buildMetastoreVolumes(hiveCluster, volumeMounts, volumes); + + // Merge JDBC driver JAR with global externalJars into one list + List allJars = new ArrayList<>(); + if (db.driverJarUrl() != null) { + allJars.add(db.driverJarUrl()); + } + if (spec.externalJars() != null) { + allJars.addAll(spec.externalJars()); + } + addExternalJars(spec.image(), allJars, + initContainers, volumeMounts, volumes, envVars); + // Replace directory mount with subPath mounts to avoid + // broken symlinks from K8s ConfigMap rotation. + replaceConfMountWithSubPaths(volumeMounts, "hive-config", + "metastore-site.xml", "core-site.xml"); + + // Pre-compute config hash for the pod template annotation. + // This ensures the Deployment is created with the correct hash + // from the start (single ReplicaSet) and triggers rolling + // updates when ConfigMap content changes. + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getMetastoreSite(spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + Deployment deployment = new DeploymentBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withReplicas(spec.metastore().replicas()) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .addToAnnotations("kubectl.kubernetes.io/default-container", "metastore") + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName("metastore") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withPorts(ports) + .withReadinessProbe(readinessProbe) + .withLivenessProbe(livenessProbe) + .withResources(buildResources( + spec.metastore().resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + applySpreadAffinityIfAbsent( + deployment.getSpec().getTemplate().getSpec(), selectorLabels); + + if (spec.volumes() != null) { + deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); + } + if (spec.volumeMounts() != null) { + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.volumeMounts()); + } + if (spec.metastore().extraVolumes() != null) { + deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.metastore().extraVolumes()); + } + if (spec.metastore().extraVolumeMounts() != null) { + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.metastore().extraVolumeMounts()); + } + return deployment; + } + + /** Returns the Deployment resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-metastore"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java new file mode 100644 index 000000000000..fff9ccf4d490 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** Manages the Kubernetes Service for the Hive Metastore (Thrift + REST ports). */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class MetastoreServiceDependent + extends HiveDependentResource { + + public MetastoreServiceDependent() { + super(Service.class); + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + return new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-metastore") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, + MetastoreDeploymentDependent.COMPONENT)) + .endMetadata() + .withNewSpec() + .withType("ClusterIP") + .withSelector(Labels.selectorForComponent(hiveCluster, + MetastoreDeploymentDependent.COMPONENT)) + .addNewPort() + .withName("thrift") + .withPort(9083) + .withTargetPort(new IntOrString(9083)) + .endPort() + .addNewPort() + .withName("rest") + .withPort(9001) + .withTargetPort(new IntOrString(9001)) + .endPort() + .endSpec() + .build(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java new file mode 100644 index 000000000000..a23c0c477436 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.fabric8.kubernetes.api.model.batch.v1.JobBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; +import org.apache.hive.kubernetes.operator.model.spec.SecretKeyRef; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the Kubernetes Job that initializes or upgrades the Hive Metastore + * database schema using schematool. + */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=schema-init," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class SchemaInitJobDependent + extends HiveDependentResource { + + public static final String COMPONENT = "schema-init"; + + public SchemaInitJobDependent() { + super(Job.class); + } + + @Override + protected Job desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + DatabaseConfig db = spec.metastore().database(); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "metastore", null)); + envVars.add(new EnvVar("IS_RESUME", "false", null)); + envVars.add(new EnvVar("HIVE_CUSTOM_CONF_DIR", + CONF_MOUNT_PATH, null)); + envVars.addAll(buildDbEnvVars(db)); + + SecretKeyRef passwordRef = db.passwordSecretRef(); + boolean hasDriverJar = db.driverJarUrl() != null; + + // This Job runs schematool directly (not via the entrypoint), + // so we must replicate the entrypoint's config setup: + // 1. Symlink custom config files into HIVE_CONF_DIR + // 2. Set HADOOP_CLIENT_OPTS to pass SERVICE_OPTS as JVM args + // 3. Copy JDBC driver jar if downloaded by init container + StringBuilder cmd = new StringBuilder(); + cmd.append("export HIVE_CONF_DIR=$HIVE_HOME/conf && "); + cmd.append("if [ -d \"${HIVE_CUSTOM_CONF_DIR:-}\" ]; then "); + cmd.append("find \"${HIVE_CUSTOM_CONF_DIR}\" -type f -exec "); + cmd.append("ln -sfn {} \"${HIVE_CONF_DIR}\"/ \\; ; "); + cmd.append("export HADOOP_CONF_DIR=$HIVE_CONF_DIR; fi && "); + cmd.append("export HADOOP_CLIENT_OPTS=" + + "\"${HADOOP_CLIENT_OPTS:-} -Xmx1G ${SERVICE_OPTS:-}\" && "); + if (hasDriverJar) { + cmd.append("cp ").append(EXT_JARS_PATH) + .append("/*.jar $HIVE_HOME/lib/ && "); + } + cmd.append("$HIVE_HOME/bin/schematool -dbType ") + .append(db.type()) + .append(" -initOrUpgradeSchema"); + if (passwordRef != null) { + cmd.append(" -passWord \"$DBPASSWORD\""); + } + String schemaCommand = cmd.toString(); + + List initContainers = new ArrayList<>(); + List volumeMounts = new ArrayList<>(); + List volumes = new ArrayList<>(); + buildMetastoreVolumes(hiveCluster, volumeMounts, volumes); + + // Schema init needs the JDBC driver JAR + List jars = new ArrayList<>(); + if (db.driverJarUrl() != null) { + jars.add(db.driverJarUrl()); + } + addExternalJars(spec.image(), jars, + initContainers, volumeMounts, volumes, envVars); + + return new JobBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withBackoffLimit(3) + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent( + hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withRestartPolicy("OnFailure") + .withInitContainers(initContainers) + .addNewContainer() + .withName("schema-init") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withCommand("/bin/bash", "-c") + .withArgs(schemaCommand) + .withEnv(envVars) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + } + + /** Returns the Job resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-schema-init"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java new file mode 100644 index 000000000000..6a645f043574 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.List; + +import io.fabric8.kubernetes.api.model.PersistentVolumeClaim; +import io.fabric8.kubernetes.api.model.PersistentVolumeClaimBuilder; +import io.fabric8.kubernetes.api.model.Quantity; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.TezAmSpec; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the shared scratch PersistentVolumeClaim mounted by both + * HiveServer2 and TezAM at /opt/hive/scratch. + *

+ * This mirrors the Docker Compose pattern where a named volume + * {@code scratch:/opt/hive/scratch} is shared between the hs2 and + * tezam containers so that the {@code dummy_path} written by HS2 + * (for VALUES clause) is accessible by the TezAM. + *

+ * The PVC uses ReadWriteMany access mode so both pods can mount it + * simultaneously. + */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=scratch," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class ScratchPvcDependent + extends HiveDependentResource { + + public static final String COMPONENT = "scratch"; + + public ScratchPvcDependent() { + super(PersistentVolumeClaim.class); + } + + @Override + protected PersistentVolumeClaim desired(HiveCluster hiveCluster, + Context context) { + TezAmSpec tezAm = hiveCluster.getSpec().tezAm(); + + PersistentVolumeClaimBuilder builder = new PersistentVolumeClaimBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withAccessModes(List.of("ReadWriteMany")) + .withNewResources() + .addToRequests("storage", + new Quantity(tezAm.scratchStorageSize())) + .endResources() + .endSpec(); + + if (tezAm.scratchStorageClassName() != null) { + builder.editSpec() + .withStorageClassName(tezAm.scratchStorageClassName()) + .endSpec(); + } + + return builder.build(); + } + + /** Returns the PVC resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-scratch"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java new file mode 100644 index 000000000000..781685286038 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the headless Kubernetes Service for Tez Application Master. + * Required by the StatefulSet for stable DNS entries so that + * HiveServer2 can resolve TezAM pod hostnames for RPC communication. + */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=tezam," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class TezAmServiceDependent + extends HiveDependentResource { + + public TezAmServiceDependent() { + super(Service.class); + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + return new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-tezam") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, + TezAmStatefulSetDependent.COMPONENT)) + .endMetadata() + .withNewSpec() + .withClusterIP("None") + .withSelector(Labels.selectorForComponent(hiveCluster, + TezAmStatefulSetDependent.COMPONENT)) + .endSpec() + .build(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java new file mode 100644 index 000000000000..5cc7a3f800f3 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.apps.StatefulSet; +import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.TezAmSpec; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages the Kubernetes StatefulSet for the Tez Application Master. + * Uses StatefulSet (with a headless Service) so that each TezAM pod + * gets a stable, DNS-resolvable hostname. HiveServer2 discovers + * TezAM pods via ZooKeeper and connects over RPC using the hostname, + * so the hostname must be resolvable within the cluster. + */ +@KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=tezam," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") +) +public class TezAmStatefulSetDependent + extends HiveDependentResource { + + public static final String COMPONENT = "tezam"; + private static final String SCRATCH_MOUNT_PATH = "/opt/hive/scratch"; + + public TezAmStatefulSetDependent() { + super(StatefulSet.class); + } + + @Override + protected StatefulSet desired(HiveCluster hiveCluster, + Context context) { + HiveClusterSpec spec = hiveCluster.getSpec(); + TezAmSpec tezAm = spec.tezAm(); + Map selectorLabels = + Labels.selectorForComponent(hiveCluster, COMPONENT); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "tezam", null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", + spec.zookeeper().quorum(), null)); + envVars.add(new EnvVar("TEZ_FRAMEWORK_MODE", + "STANDALONE_ZOOKEEPER", null)); + + if (spec.llap().isEnabled()) { + envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", + spec.llap().serviceHosts(), null)); + } + + // User-provided env vars (storage credentials, etc.) + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + String headlessServiceName = + hiveCluster.getMetadata().getName() + "-tezam"; + + List volumeMounts = + new ArrayList<>(); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("hive-config") + .withMountPath(CONF_MOUNT_PATH).build()); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("scratch") + .withMountPath(SCRATCH_MOUNT_PATH).build()); + + List volumes = + new ArrayList<>(); + volumes.add(buildProjectedConfigVolume("hive-config", + HiveServer2ConfigMapDependent.resourceName(hiveCluster), + HadoopConfigMapDependent.resourceName(hiveCluster))); + volumes.add(new io.fabric8.kubernetes.api.model.VolumeBuilder() + .withName("scratch") + .withNewPersistentVolumeClaim() + .withClaimName(ScratchPvcDependent.resourceName(hiveCluster)) + .endPersistentVolumeClaim() + .build()); + + List initContainers = new ArrayList<>(); + addExternalJars(spec.image(), spec.externalJars(), + initContainers, volumeMounts, volumes, envVars); + replaceConfMountWithSubPaths(volumeMounts, "hive-config", + "hive-site.xml", "tez-site.xml", "core-site.xml"); + + // Pre-compute config hash for the pod template annotation. + // TezAM uses the same ConfigMaps as HS2 (hive-site.xml + tez-site.xml + core-site.xml). + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + StatefulSet statefulSet = new StatefulSetBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .endMetadata() + .withNewSpec() + .withReplicas(tezAm.replicas()) + .withServiceName(headlessServiceName) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) + .addToAnnotations("kubectl.kubernetes.io/default-container", "tezam") + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName("tezam") + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withResources(buildResources(tezAm.resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + applySpreadAffinityIfAbsent( + statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); + + if (spec.volumes() != null) { + statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); + } + if (spec.volumeMounts() != null) { + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(spec.volumeMounts()); + } + if (tezAm.extraVolumes() != null) { + statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(tezAm.extraVolumes()); + } + if (tezAm.extraVolumeMounts() != null) { + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() + .addAll(tezAm.extraVolumeMounts()); + } + return statefulSet; + } + + /** Returns the StatefulSet resource name for this HiveCluster. */ + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-tezam"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java new file mode 100644 index 000000000000..a36002dbf886 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Precondition for HiveServer2 Deployment. + * If Metastore is external, proceed immediately. + * If managed, wait for Metastore pods to be ready. + */ +public class HiveServer2Precondition implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + + int desiredReplicas = primary.getSpec().metastore().replicas(); + return context.getSecondaryResources(Deployment.class).stream() + .filter(d -> d.getMetadata().getName().equals(primary.getMetadata().getName() + "-metastore")) + .findFirst() + .map(deployment -> deployment.getStatus() != null + && deployment.getStatus().getReadyReplicas() != null + && deployment.getStatus().getReadyReplicas() >= desiredReplicas) + .orElse(false); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java new file mode 100644 index 000000000000..a113c50efbff --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for LLAP dependent resources. + * Returns true only when spec.llap.enabled is true. + */ +public class LlapEnabledCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().llap().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java new file mode 100644 index 000000000000..b1cb4139ac96 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for Metastore dependent resources. + * Returns true only when spec.metastore.enabled is true. + */ +public class MetastoreEnabledCondition implements Condition { + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().metastore().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java new file mode 100644 index 000000000000..7b3169f32043 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Ready condition that checks whether the Metastore Deployment has the + * desired number of ready replicas. Used to gate HiveServer2 Deployment. + */ +public class MetastoreReadyCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + int desiredReplicas = primary.getSpec().metastore().replicas(); + return dependentResource.getSecondaryResource(primary, context) + .map(deployment -> deployment.getStatus() != null + && deployment.getStatus().getReadyReplicas() != null + && deployment.getStatus().getReadyReplicas() >= desiredReplicas) + .orElse(false); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java new file mode 100644 index 000000000000..1b0b44318596 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Ready condition that checks whether the schema initialization Job + * has completed successfully. Used to gate Metastore Deployment creation. + */ +public class SchemaJobCompletedCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + return dependentResource.getSecondaryResource(primary, context) + .map(job -> job.getStatus() != null + && job.getStatus().getSucceeded() != null + && job.getStatus().getSucceeded() >= 1) + .orElse(false); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java new file mode 100644 index 000000000000..85ae7e45dbdb --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for Tez AM dependent resources. + * Returns true only when spec.tezAm.enabled is true. + */ +public class TezAmEnabledCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().tezAm().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java new file mode 100644 index 000000000000..6a708e7c8c91 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveCluster.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model; + +import io.fabric8.kubernetes.api.model.Namespaced; +import io.fabric8.kubernetes.client.CustomResource; +import io.fabric8.kubernetes.model.annotation.Group; +import io.fabric8.kubernetes.model.annotation.Kind; +import io.fabric8.kubernetes.model.annotation.ShortNames; +import io.fabric8.kubernetes.model.annotation.Version; + +/** + * HiveCluster is the root CRD type representing a complete Apache Hive deployment + * on Kubernetes. It manages Metastore, HiveServer2, LLAP daemons, and Tez AM. + */ +@Group("hive.apache.org") +@Version("v1alpha1") +@Kind("HiveCluster") +@ShortNames("hc") +public class HiveCluster + extends CustomResource + implements Namespaced { +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java new file mode 100644 index 000000000000..40dd8a771203 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model; + +import java.util.List; +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Required; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; +import org.apache.hive.kubernetes.operator.model.spec.HadoopSpec; +import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; +import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.model.spec.MetastoreSpec; +import org.apache.hive.kubernetes.operator.model.spec.TezAmSpec; +import org.apache.hive.kubernetes.operator.model.spec.ZookeeperSpec; + +/** Full specification for a HiveCluster custom resource. */ +public record HiveClusterSpec( + @JsonPropertyDescription("Docker image to use for all Hive components") + String image, + @JsonPropertyDescription("Image pull policy: Always, Never, or IfNotPresent") + String imagePullPolicy, + @JsonPropertyDescription("Metastore component configuration") + MetastoreSpec metastore, + @JsonPropertyDescription("HiveServer2 component configuration") + HiveServer2Spec hiveServer2, + @JsonPropertyDescription("LLAP daemon configuration. Enabled by default.") + LlapSpec llap, + @JsonPropertyDescription("Tez Application Master configuration. Enabled by default.") + TezAmSpec tezAm, + @Required + @JsonPropertyDescription( + "External ZooKeeper connection details (not managed by this operator)") + ZookeeperSpec zookeeper, + @JsonPropertyDescription("Hadoop/core-site.xml configuration overrides") + HadoopSpec hadoop, + @JsonPropertyDescription( + "Environment variables injected into all component pods " + + "(e.g., storage credentials, custom JVM options)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List envVars, + @JsonPropertyDescription( + "External JARs (URLs) downloaded into all component pods and added to " + + "HADOOP_CLASSPATH (e.g., GCS connector, ABFS connector)") + List externalJars, + @JsonPropertyDescription( + "Volumes added to all component pods " + + "(e.g., Secrets containing keytabs or service account keys)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List volumes, + @JsonPropertyDescription( + "Volume mounts added to all component containers " + + "(e.g., mounting a GCS key file at /etc/gcs/key.json)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List volumeMounts) { + + public HiveClusterSpec { + Objects.requireNonNull(zookeeper, + "zookeeper must be provided in the HiveCluster spec"); + envVars = envVars != null ? envVars : List.of(); + externalJars = externalJars != null ? externalJars : List.of(); + volumes = volumes != null ? volumes : List.of(); + volumeMounts = volumeMounts != null ? volumeMounts : List.of(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java new file mode 100644 index 000000000000..d2432dda2246 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model; + +import java.util.ArrayList; +import java.util.List; + +import io.fabric8.kubernetes.api.model.Condition; +import org.apache.hive.kubernetes.operator.model.status.ComponentStatus; + +/** Status subresource for the HiveCluster custom resource. */ +public class HiveClusterStatus { + + private List conditions = new ArrayList<>(); + private ComponentStatus metastore; + private ComponentStatus hiveServer2; + private ComponentStatus llap; + private ComponentStatus tezAm; + private Long observedGeneration; + + public List getConditions() { + return conditions; + } + + public void setConditions(List conditions) { + this.conditions = conditions; + } + + public ComponentStatus getMetastore() { + return metastore; + } + + public void setMetastore(ComponentStatus metastore) { + this.metastore = metastore; + } + + public ComponentStatus getHiveServer2() { + return hiveServer2; + } + + public void setHiveServer2(ComponentStatus hiveServer2) { + this.hiveServer2 = hiveServer2; + } + + public ComponentStatus getLlap() { + return llap; + } + + public void setLlap(ComponentStatus llap) { + this.llap = llap; + } + + public ComponentStatus getTezAm() { + return tezAm; + } + + public void setTezAm(ComponentStatus tezAm) { + this.tezAm = tezAm; + } + + public Long getObservedGeneration() { + return observedGeneration; + } + + public void setObservedGeneration(Long observedGeneration) { + this.observedGeneration = observedGeneration; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + HiveClusterStatus that = (HiveClusterStatus) o; + return java.util.Objects.equals(observedGeneration, that.observedGeneration) && + java.util.Objects.equals(conditions, that.conditions) && + java.util.Objects.equals(metastore, that.metastore) && + java.util.Objects.equals(hiveServer2, that.hiveServer2) && + java.util.Objects.equals(llap, that.llap) && + java.util.Objects.equals(tezAm, that.tezAm); + } + + @Override + public int hashCode() { + return java.util.Objects.hash(conditions, metastore, hiveServer2, llap, tezAm, observedGeneration); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java new file mode 100644 index 000000000000..a93b4684bd02 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/DatabaseConfig.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.generator.annotation.Default; + +/** JDBC database connection configuration for the Hive Metastore backend. */ +public record DatabaseConfig( + @JsonPropertyDescription("Database type: derby, mysql, postgres, mssql, or oracle") + @Default("derby") + String type, + @JsonPropertyDescription("JDBC connection URL") + String url, + @JsonPropertyDescription("JDBC driver class name") + String driver, + @JsonPropertyDescription("Database username") + String username, + @JsonPropertyDescription("Reference to a Kubernetes Secret containing the database password") + SecretKeyRef passwordSecretRef, + @JsonPropertyDescription( + "URL to download the JDBC driver jar, e.g. " + + "https://repo1.maven.org/maven2/org/postgresql/" + + "postgresql/42.7.5/postgresql-42.7.5.jar") + String driverJarUrl) { + + public DatabaseConfig { + type = type != null ? type : "derby"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HadoopSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HadoopSpec.java new file mode 100644 index 000000000000..420dc66d4c54 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HadoopSpec.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; + +/** Hadoop configuration overrides, primarily for core-site.xml (filesystem settings). */ +public record HadoopSpec( + @JsonPropertyDescription("Key-value pairs written into core-site.xml") + Map coreSiteOverrides) { +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java new file mode 100644 index 000000000000..578b7e91f67d --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Default; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; + +/** Configuration for the HiveServer2 component. */ +public record HiveServer2Spec( + @JsonPropertyDescription("Number of replicas") + @Default("1") + Integer replicas, + @JsonPropertyDescription("Resource requirements for pods") + ResourceRequirementsSpec resources, + @JsonPropertyDescription("Additional configuration overrides as key-value pairs") + Map configOverrides, + @JsonPropertyDescription("Additional volumes to attach to the pod (e.g., for keytabs or truststores)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumes, + @JsonPropertyDescription("Additional volume mounts for the container") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumeMounts, + @JsonPropertyDescription("Kubernetes Service type: ClusterIP, LoadBalancer, or NodePort") + @Default("ClusterIP") + String serviceType, + @JsonPropertyDescription("HiveServer2 Thrift port") + @Default("10000") + Integer thriftPort, + @JsonPropertyDescription("HiveServer2 Web UI port") + @Default("10002") + Integer webUiPort, + @JsonPropertyDescription("List of URIs to external JARs to download and add to HS2 classpath ") + List externalJars, + @JsonPropertyDescription("Readiness probe configuration") + ProbeSpec readinessProbe, + @JsonPropertyDescription("Liveness probe configuration") + ProbeSpec livenessProbe) { + + public HiveServer2Spec { + replicas = replicas != null ? replicas : 1; + serviceType = serviceType != null ? serviceType : "ClusterIP"; + thriftPort = thriftPort != null ? thriftPort : 10000; + webUiPort = webUiPort != null ? webUiPort : 10002; + extraVolumes = extraVolumes != null ? extraVolumes : List.of(); + extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + externalJars = externalJars != null ? externalJars : List.of(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java new file mode 100644 index 000000000000..17ff5967ff9a --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Default; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; + +/** Configuration for LLAP (Live Long and Process) daemons. */ +public record LlapSpec( + @JsonPropertyDescription("Number of replicas") + @Default("1") + Integer replicas, + @JsonPropertyDescription("Resource requirements for pods") + ResourceRequirementsSpec resources, + @JsonPropertyDescription("Additional configuration overrides as key-value pairs") + Map configOverrides, + @JsonPropertyDescription("Additional volumes to attach to the pod (e.g., for keytabs or truststores)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumes, + @JsonPropertyDescription("Additional volume mounts for the container") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumeMounts, + @JsonPropertyDescription("Whether LLAP is enabled") + @Default("true") + Boolean enabled, + @JsonPropertyDescription("Number of LLAP executors per daemon") + @Default("1") + Integer executors, + @JsonPropertyDescription("Memory in MB per LLAP daemon instance") + @Default("1024") + Integer memoryMb, + @JsonPropertyDescription("LLAP service hosts identifier for ZooKeeper registration") + String serviceHosts, + @JsonPropertyDescription("Readiness probe configuration") + ProbeSpec readinessProbe) { + + public LlapSpec { + replicas = replicas != null ? replicas : 1; + enabled = enabled != null ? enabled : true; + executors = executors != null ? executors : 1; + memoryMb = memoryMb != null ? memoryMb : 1024; + serviceHosts = serviceHosts != null ? serviceHosts : "@llap0"; + extraVolumes = extraVolumes != null ? extraVolumes : List.of(); + extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java new file mode 100644 index 000000000000..307c17221ee7 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Default; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; + +/** Configuration for the Hive Metastore component. */ +public record MetastoreSpec( + @JsonPropertyDescription("Number of replicas") + @Default("1") + Integer replicas, + @JsonPropertyDescription("Resource requirements for pods") + ResourceRequirementsSpec resources, + @JsonPropertyDescription("Additional configuration overrides as key-value pairs") + Map configOverrides, + @JsonPropertyDescription("Additional volumes to attach to the pod (e.g., for keytabs or truststores)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumes, + @JsonPropertyDescription("Additional volume mounts for the container") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumeMounts, + @JsonPropertyDescription("Database connection configuration for the metastore backend") + DatabaseConfig database, + @JsonPropertyDescription("Warehouse directory path") + @Default("/hive/warehouse") + String warehouseDir, + @JsonPropertyDescription("Whether the operator should deploy and manage a Metastore") + @Default("true") + Boolean enabled, + @JsonPropertyDescription("Thrift URI of the external Metastore (if enabled is false)") + String externalUri, + @JsonPropertyDescription("Readiness probe configuration") + ProbeSpec readinessProbe, + @JsonPropertyDescription("Liveness probe configuration") + ProbeSpec livenessProbe) { + + public MetastoreSpec { + replicas = replicas != null ? replicas : 1; + database = database != null ? database : new DatabaseConfig( + "derby", null, null, null, null, null); + warehouseDir = warehouseDir != null ? warehouseDir : "/hive/warehouse"; + enabled = enabled != null ? enabled : true; + extraVolumes = extraVolumes != null ? extraVolumes : List.of(); + extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ProbeSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ProbeSpec.java new file mode 100644 index 000000000000..7afab6a0277b --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ProbeSpec.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; + +/** Kubernetes probe (liveness/readiness) timing configurations. */ +public record ProbeSpec( + @JsonPropertyDescription("Number of seconds after the container has started before probes are initiated.") + Integer initialDelaySeconds, + @JsonPropertyDescription("How often (in seconds) to perform the probe.") + Integer periodSeconds, + @JsonPropertyDescription("Number of seconds after which the probe times out.") + Integer timeoutSeconds, + @JsonPropertyDescription("Minimum consecutive failures for the probe to be considered failed after having succeeded.") + Integer failureThreshold, + @JsonPropertyDescription("Minimum consecutive successes for the probe to be considered successful after having failed.") + Integer successThreshold) { +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java new file mode 100644 index 000000000000..b7b10934bc77 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ResourceRequirementsSpec.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.generator.annotation.Default; + +/** Kubernetes resource requirements specification for CPU and memory. */ +public record ResourceRequirementsSpec( + @JsonPropertyDescription("CPU request (e.g. 500m, 1)") + @Default("500m") + String requestsCpu, + @JsonPropertyDescription("Memory request (e.g. 1Gi, 512Mi)") + @Default("1Gi") + String requestsMemory, + @JsonPropertyDescription("CPU limit (e.g. 2, 1000m)") + String limitsCpu, + @JsonPropertyDescription("Memory limit (e.g. 2Gi, 1024Mi)") + String limitsMemory) { + + public ResourceRequirementsSpec { + requestsCpu = requestsCpu != null ? requestsCpu : "500m"; + requestsMemory = requestsMemory != null ? requestsMemory : "1Gi"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/SecretKeyRef.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/SecretKeyRef.java new file mode 100644 index 000000000000..3084552bd1e1 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/SecretKeyRef.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; + +/** Reference to a key within a Kubernetes Secret. */ +public record SecretKeyRef( + @JsonPropertyDescription("Name of the Kubernetes Secret") + String name, + @JsonPropertyDescription("Key within the Secret") + String key) { +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java new file mode 100644 index 000000000000..a0494c2c5e73 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.crd.generator.annotation.PreserveUnknownFields; +import io.fabric8.crd.generator.annotation.SchemaFrom; +import io.fabric8.generator.annotation.Default; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; + +/** Configuration for the Tez Application Master component. */ +public record TezAmSpec( + @JsonPropertyDescription("Number of replicas") + @Default("1") + Integer replicas, + @JsonPropertyDescription("Resource requirements for pods") + ResourceRequirementsSpec resources, + @JsonPropertyDescription("Additional configuration overrides as key-value pairs") + Map configOverrides, + @JsonPropertyDescription("Additional volumes to attach to the pod (e.g., for keytabs or truststores)") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumes, + @JsonPropertyDescription("Additional volume mounts for the container") + @SchemaFrom(type = Object[].class) @PreserveUnknownFields + List extraVolumeMounts, + @JsonPropertyDescription("Whether Tez AM is enabled") + @Default("true") + Boolean enabled, + @JsonPropertyDescription("Storage size for the shared scratch PVC " + + "(ReadWriteMany) mounted on HS2 and TezAM at /opt/hive/scratch") + @Default("1Gi") + String scratchStorageSize, + @JsonPropertyDescription("StorageClass for the shared scratch PVC. " + + "Must support ReadWriteMany access. If null, uses cluster default.") + String scratchStorageClassName) { + + public TezAmSpec { + replicas = replicas != null ? replicas : 1; + enabled = enabled != null ? enabled : true; + scratchStorageSize = scratchStorageSize != null ? scratchStorageSize : "1Gi"; + extraVolumes = extraVolumes != null ? extraVolumes : List.of(); + extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java new file mode 100644 index 000000000000..a33908ae38df --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/ZookeeperSpec.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.generator.annotation.Required; + +/** External ZooKeeper connection configuration. ZooKeeper is not managed by this operator. */ +public record ZookeeperSpec( + @Required + @JsonPropertyDescription("ZooKeeper quorum connection string. This field is strictly required.") + String quorum) { + + public ZookeeperSpec { + Objects.requireNonNull(quorum, + "ZooKeeper quorum must be explicitly defined in the HiveCluster spec."); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java new file mode 100644 index 000000000000..155c46f3a714 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.status; + +/** + * Status of an individual Hive component (Metastore, HS2, LLAP, TezAM). + */ +public class ComponentStatus { + + private int readyReplicas; + private int desiredReplicas; + private String phase; + + public int getReadyReplicas() { + return readyReplicas; + } + + public void setReadyReplicas(int readyReplicas) { + this.readyReplicas = readyReplicas; + } + + public int getDesiredReplicas() { + return desiredReplicas; + } + + public void setDesiredReplicas(int desiredReplicas) { + this.desiredReplicas = desiredReplicas; + } + + public String getPhase() { + return phase; + } + + public void setPhase(String phase) { + this.phase = phase; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ComponentStatus that = (ComponentStatus) o; + return readyReplicas == that.readyReplicas && desiredReplicas == that.desiredReplicas && java.util.Objects.equals( + phase, that.phase); + } + + @Override + public int hashCode() { + return java.util.Objects.hash(readyReplicas, desiredReplicas, phase); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java new file mode 100644 index 000000000000..c9823640a5e7 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java @@ -0,0 +1,335 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.reconciler; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +import io.fabric8.kubernetes.api.model.Condition; +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.StatefulSet; +import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.ControllerConfiguration; +import io.javaoperatorsdk.operator.api.reconciler.ErrorStatusUpdateControl; +import io.javaoperatorsdk.operator.api.reconciler.Reconciler; +import io.javaoperatorsdk.operator.api.reconciler.UpdateControl; +import io.javaoperatorsdk.operator.api.reconciler.Workflow; +import io.javaoperatorsdk.operator.api.reconciler.dependent.Dependent; +import org.apache.hive.kubernetes.operator.dependent.HadoopConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2ConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2DeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2ServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.LlapConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.LlapServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.LlapStatefulSetDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastoreConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastoreDeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastoreServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.SchemaInitJobDependent; +import org.apache.hive.kubernetes.operator.dependent.ScratchPvcDependent; +import org.apache.hive.kubernetes.operator.dependent.TezAmServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.TezAmStatefulSetDependent; +import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2Precondition; +import org.apache.hive.kubernetes.operator.dependent.condition.LlapEnabledCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreEnabledCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreReadyCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.SchemaJobCompletedCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.TezAmEnabledCondition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterStatus; +import org.apache.hive.kubernetes.operator.model.status.ComponentStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Main reconciler for the HiveCluster custom resource. + * Orchestrates all dependent resources with proper dependency ordering. + */ +@ControllerConfiguration +@Workflow(dependents = { + // --- ConfigMap dependents --- + @Dependent( + name = "hadoop-configmap", + type = HadoopConfigMapDependent.class + ), + @Dependent( + name = "metastore-configmap", + type = MetastoreConfigMapDependent.class, + activationCondition = MetastoreEnabledCondition.class + ), + @Dependent( + name = "hiveserver2-configmap", + type = HiveServer2ConfigMapDependent.class + ), + // --- Job dependents --- + @Dependent( + name = "schema-init-job", + type = SchemaInitJobDependent.class, + dependsOn = {"metastore-configmap", "hadoop-configmap"}, + readyPostcondition = SchemaJobCompletedCondition.class, + activationCondition = MetastoreEnabledCondition.class + ), + // --- Deployment dependents --- + @Dependent( + name = "metastore-deployment", + type = MetastoreDeploymentDependent.class, + dependsOn = {"schema-init-job"}, + readyPostcondition = MetastoreReadyCondition.class, + activationCondition = MetastoreEnabledCondition.class + ), + // --- Service dependents --- + @Dependent( + name = "metastore-service", + type = MetastoreServiceDependent.class, + dependsOn = {"metastore-configmap"}, + activationCondition = MetastoreEnabledCondition.class + ), + @Dependent( + name = "hiveserver2-deployment", + type = HiveServer2DeploymentDependent.class, + dependsOn = {"hiveserver2-configmap", + "hadoop-configmap"}, + reconcilePrecondition = HiveServer2Precondition.class + ), + @Dependent( + name = "hiveserver2-service", + type = HiveServer2ServiceDependent.class, + dependsOn = {"hiveserver2-configmap"} + ), + // --- LLAP (conditional) --- + @Dependent( + name = "llap-configmap", + type = LlapConfigMapDependent.class, + activationCondition = LlapEnabledCondition.class + ), + @Dependent( + name = "llap-statefulset", + type = LlapStatefulSetDependent.class, + dependsOn = {"llap-configmap", "hadoop-configmap"}, + activationCondition = LlapEnabledCondition.class + ), + @Dependent( + name = "llap-service", + type = LlapServiceDependent.class, + activationCondition = LlapEnabledCondition.class + ), + // --- TezAM (conditional) --- + @Dependent( + name = "scratch-pvc", + type = ScratchPvcDependent.class, + activationCondition = TezAmEnabledCondition.class + ), + @Dependent( + name = "tezam-service", + type = TezAmServiceDependent.class, + activationCondition = TezAmEnabledCondition.class + ), + @Dependent( + name = "tezam-statefulset", + type = TezAmStatefulSetDependent.class, + dependsOn = {"hiveserver2-configmap", "hadoop-configmap", + "tezam-service", "scratch-pvc"}, + activationCondition = TezAmEnabledCondition.class + ) +}) +public class HiveClusterReconciler + implements Reconciler { + + private static final Logger LOG = + LoggerFactory.getLogger(HiveClusterReconciler.class); + + @Override + public UpdateControl reconcile(HiveCluster resource, + Context context) { + LOG.debug("Reconciling HiveCluster: {}/{}", + resource.getMetadata().getNamespace(), + resource.getMetadata().getName()); + + HiveClusterStatus existingStatus = resource.getStatus(); + HiveClusterStatus newStatus = buildStatus(resource, context, existingStatus); + + if (Objects.equals(existingStatus, newStatus)) { + return UpdateControl.noUpdate(); + } + + resource.setStatus(newStatus); + return UpdateControl.patchStatus(resource); + } + + @Override + public ErrorStatusUpdateControl updateErrorStatus( + HiveCluster resource, Context context, Exception e) { + LOG.error("Error reconciling HiveCluster: {}/{}", + resource.getMetadata().getNamespace(), + resource.getMetadata().getName(), e); + + HiveClusterStatus status = resource.getStatus() != null + ? resource.getStatus() : new HiveClusterStatus(); + + List existingConditions = status.getConditions() != null + ? status.getConditions() : Collections.emptyList(); + + status.setConditions(List.of( + buildCondition("Ready", "False", "ReconciliationError", + e.getMessage(), existingConditions) + )); + status.setObservedGeneration(resource.getMetadata().getGeneration()); + resource.setStatus(status); + + return ErrorStatusUpdateControl.patchStatus(resource); + } + + private HiveClusterStatus buildStatus(HiveCluster resource, + Context context, HiveClusterStatus existingStatus) { + + HiveClusterStatus status = new HiveClusterStatus(); + status.setObservedGeneration(resource.getMetadata().getGeneration()); + + List existingConditions = existingStatus != null && existingStatus.getConditions() != null + ? existingStatus.getConditions() : Collections.emptyList(); + List conditions = new ArrayList<>(); + + // Schema Init status + boolean schemaReady; + if (resource.getSpec().metastore().isEnabled()) { + schemaReady = context.getSecondaryResource(Job.class) + .map(j -> j.getStatus() != null && j.getStatus().getSucceeded() != null && j.getStatus().getSucceeded() >= 1) + .orElse(false); + } else { + schemaReady = true; + } + + conditions.add(buildCondition("SchemaInitialized", schemaReady ? "True" : "False", + schemaReady ? "JobCompleted" : "JobPending", + schemaReady ? "Schema initialized successfully" : "Schema initialization pending", + existingConditions)); + + // Metastore status + boolean metastoreReady; + if (resource.getSpec().metastore().isEnabled()) { + ComponentStatus metastoreStatus = + buildComponentStatus(context, Deployment.class, resource.getMetadata().getName() + "-metastore", + resource.getSpec().metastore().replicas(), + d -> d.getStatus() != null && d.getStatus().getReadyReplicas() != null ? + d.getStatus().getReadyReplicas() : + 0); + status.setMetastore(metastoreStatus); + + metastoreReady = metastoreStatus.getReadyReplicas() >= metastoreStatus.getDesiredReplicas() + && metastoreStatus.getDesiredReplicas() > 0; + + conditions.add(buildCondition("MetastoreReady", metastoreReady ? "True" : "False", + metastoreReady ? "DeploymentReady" : "DeploymentNotReady", + metastoreReady ? "Metastore is ready" : "Metastore not yet ready", existingConditions)); + } else { + metastoreReady = true; + conditions.add(buildCondition("MetastoreReady", "True", "ExternalMetastore", "Using external Hive Metastore", + existingConditions)); + } + + // HiveServer2 status + ComponentStatus hs2Status = buildComponentStatus(context, Deployment.class, + resource.getMetadata().getName() + "-hiveserver2", + resource.getSpec().hiveServer2().replicas(), + d -> d.getStatus() != null && d.getStatus().getReadyReplicas() != null ? d.getStatus().getReadyReplicas() : 0); + status.setHiveServer2(hs2Status); + + boolean hs2Ready = + hs2Status.getReadyReplicas() >= hs2Status.getDesiredReplicas() && hs2Status.getDesiredReplicas() > 0; + conditions.add(buildCondition("HiveServer2Ready", hs2Ready ? "True" : "False", + hs2Ready ? "DeploymentReady" : "DeploymentNotReady", + hs2Ready ? "HiveServer2 is ready" : "HiveServer2 not yet ready", + existingConditions)); + + // LLAP status (optional) + if (resource.getSpec().llap().isEnabled()) { + status.setLlap(buildComponentStatus(context, StatefulSet.class, + resource.getMetadata().getName() + "-llap", + resource.getSpec().llap().replicas(), + s -> s.getStatus() != null && s.getStatus().getReadyReplicas() != null ? + s.getStatus().getReadyReplicas() : 0)); + } + + // TezAM status (optional) + if (resource.getSpec().tezAm().isEnabled()) { + status.setTezAm(buildComponentStatus(context, StatefulSet.class, resource.getMetadata().getName() + "-tezam", + resource.getSpec().tezAm().replicas(), + s -> s.getStatus() != null && + s.getStatus().getReadyReplicas() != null ? s.getStatus().getReadyReplicas() : 0)); + } + + // Overall Ready condition + boolean allReady = schemaReady && metastoreReady && hs2Ready; + conditions.add(buildCondition("Ready", allReady ? "True" : "False", + allReady ? "AllComponentsReady" : "ComponentsNotReady", + allReady ? "All Hive components are ready" : "One or more components are not ready", + existingConditions)); + + status.setConditions(conditions); + return status; + } + + /** + * Unified helper to build status for Deployments, StatefulSets, or any HasMetadata type + * that tracks replicas. Filters by Kubernetes resource name from the informer cache. + */ + private ComponentStatus buildComponentStatus( + Context context, Class resourceClass, String expectedResourceName, + int desiredReplicas, Function readyExtractor) { + + ComponentStatus cs = new ComponentStatus(); + cs.setDesiredReplicas(desiredReplicas); + + int ready = context.getSecondaryResources(resourceClass).stream() + .filter(r -> r.getMetadata().getName().equals(expectedResourceName)) + .findFirst() + .map(readyExtractor) + .orElse(0); + + cs.setReadyReplicas(ready); + cs.setPhase(ready >= desiredReplicas && desiredReplicas > 0 ? "Running" : "Pending"); + return cs; + } + + private Condition buildCondition(String type, String conditionStatus, + String reason, String message, List existingConditions) { + + Condition condition = new Condition(); + condition.setType(type); + condition.setStatus(conditionStatus); + condition.setReason(reason); + condition.setMessage(message); + + // Preserve lastTransitionTime when the condition status has not changed + String preservedTime = existingConditions.stream() + .filter(c -> type.equals(c.getType()) && conditionStatus.equals(c.getStatus())) + .map(Condition::getLastTransitionTime) + .findFirst() + .orElse(null); + + condition.setLastTransitionTime(preservedTime != null ? preservedTime : Instant.now().toString()); + return condition; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HadoopXmlBuilder.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HadoopXmlBuilder.java new file mode 100644 index 000000000000..a7735beea976 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HadoopXmlBuilder.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.util; + +import java.util.Map; +import java.util.TreeMap; + +/** + * Builds Hadoop-style XML configuration file content from a property map. + * The output format matches standard Hadoop configuration files as used by + * Hive, HDFS, and Tez. + */ +public final class HadoopXmlBuilder { + + private HadoopXmlBuilder() { + } + + /** + * Renders a property map as a Hadoop-style XML configuration string. + * + * @param properties key-value pairs to include in the configuration + * @return XML string in Hadoop configuration format + */ + public static String buildXml(Map properties) { + StringBuilder sb = new StringBuilder(); + sb.append("\n"); + sb.append("\n"); + sb.append("\n"); + if (properties != null) { + // Sort by key for deterministic XML output regardless of Map implementation + for (Map.Entry entry : new TreeMap<>(properties).entrySet()) { + sb.append(" \n"); + sb.append(" ").append(escapeXml(entry.getKey())) + .append("\n"); + sb.append(" ").append(escapeXml(entry.getValue())) + .append("\n"); + sb.append(" \n"); + } + } + sb.append("\n"); + return sb.toString(); + } + + private static String escapeXml(String value) { + if (value == null) { + return ""; + } + return value + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'"); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java new file mode 100644 index 000000000000..2e506febf132 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.util; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; +import org.apache.hive.kubernetes.operator.model.spec.HadoopSpec; +import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.model.spec.MetastoreSpec; + +/** + * Single source of truth for all Hive component configuration properties. + * Both ConfigMap dependents and Deployment/StatefulSet dependents call these + * methods, ensuring the config hash always matches the actual ConfigMap content. + */ +public final class HiveConfigBuilder { + + private HiveConfigBuilder() { + } + + /** Builds hive-site.xml properties for HiveServer2 and TezAM. */ + public static Map getHiveServer2HiveSite( + HiveCluster hiveCluster, HiveClusterSpec spec) { + Map props = new LinkedHashMap<>(); + boolean tezAmEnabled = spec.tezAm().isEnabled(); + String zkQuorum = spec.zookeeper().quorum(); + + String metastoreUri = spec.metastore().isEnabled() + ? "thrift://" + hiveCluster.getMetadata().getName() + "-metastore:9083" + : spec.metastore().externalUri(); + if (metastoreUri != null && !metastoreUri.isEmpty()) { + props.put("hive.metastore.uris", metastoreUri); + } + props.put("hive.metastore.warehouse.dir", spec.metastore().warehouseDir()); + props.put("hive.server2.enable.doAs", "false"); + props.put("hive.tez.exec.inplace.progress", "false"); + props.put("hive.tez.exec.print.summary", "true"); + props.put("hive.jar.directory", "/tmp"); + props.put("hive.user.install.directory", "/tmp"); + if (tezAmEnabled) { + props.put("hive.exec.local.scratchdir", "/opt/hive/scratch"); + } + + if (tezAmEnabled) { + props.put("hive.server2.tez.use.external.sessions", "true"); + props.put("hive.server2.tez.external.sessions.namespace", + "/tez-external-sessions/tez_am/server"); + props.put("hive.server2.tez.external.sessions.registry.class", + "org.apache.hadoop.hive.ql.exec.tez." + + "ZookeeperExternalSessionsRegistryClient"); + props.put("hive.zookeeper.quorum", zkQuorum); + props.put("tez.am.framework.mode", "STANDALONE_ZOOKEEPER"); + props.put("tez.am.registry.namespace", "/tez_am/server"); + props.put("tez.am.zookeeper.quorum", zkQuorum); + LlapSpec llap = spec.llap(); + if (llap.isEnabled()) { + props.put("hive.execution.mode", "llap"); + props.put("hive.llap.execution.mode", "all"); + props.put("hive.llap.daemon.service.hosts", llap.serviceHosts()); + } + } else { + props.put("hive.server2.tez.use.external.sessions", "false"); + props.put("tez.local.mode", "true"); + props.put("tez.am.framework.mode", "LOCAL"); + props.put("mapreduce.framework.name", "local"); + } + + if (spec.hiveServer2().configOverrides() != null) { + props.putAll(spec.hiveServer2().configOverrides()); + } + return props; + } + + /** Builds tez-site.xml properties for HiveServer2 and TezAM. */ + public static Map getTezSite(HiveClusterSpec spec) { + boolean tezAmEnabled = spec.tezAm().isEnabled(); + String zkQuorum = spec.zookeeper().quorum(); + + Map tezProps = new LinkedHashMap<>(); + tezProps.put("tez.am.mode.session", "true"); + tezProps.put("tez.ignore.lib.uris", "true"); + tezProps.put("tez.am.tez-ui.webservice.enable", "false"); + tezProps.put("tez.am.disable.client-version-check", "true"); + tezProps.put("tez.session.am.dag.submit.timeout.secs", "-1"); + tezProps.put("tez.am.zookeeper.quorum", zkQuorum); + tezProps.put("hive.zookeeper.quorum", zkQuorum); + if (tezAmEnabled) { + tezProps.put("tez.local.mode", "false"); + tezProps.put("tez.am.framework.mode", "STANDALONE_ZOOKEEPER"); + tezProps.put("tez.am.registry.namespace", "/tez_am/server"); + } else { + tezProps.put("tez.local.mode", "true"); + } + + LlapSpec llap = spec.llap(); + if (llap.isEnabled()) { + tezProps.put("hive.llap.daemon.service.hosts", llap.serviceHosts()); + } + + if (spec.tezAm().configOverrides() != null) { + tezProps.putAll(spec.tezAm().configOverrides()); + } + return tezProps; + } + + /** Builds core-site.xml properties from hadoop.coreSiteOverrides. */ + public static Map getHadoopCoreSite(HiveClusterSpec spec) { + Map props = new LinkedHashMap<>(); + HadoopSpec hadoop = spec.hadoop(); + if (hadoop != null && hadoop.coreSiteOverrides() != null) { + props.putAll(hadoop.coreSiteOverrides()); + } + return props; + } + + /** Builds metastore-site.xml properties. */ + public static Map getMetastoreSite(HiveClusterSpec spec) { + MetastoreSpec metastore = spec.metastore(); + Map props = new LinkedHashMap<>(); + + props.put("metastore.warehouse.dir", metastore.warehouseDir()); + + DatabaseConfig db = metastore.database(); + if (db != null) { + if (db.url() != null) { + props.put("javax.jdo.option.ConnectionURL", db.url()); + } + if (db.driver() != null) { + props.put("javax.jdo.option.ConnectionDriverName", db.driver()); + } + if (db.username() != null) { + props.put("javax.jdo.option.ConnectionUserName", db.username()); + } + } + + if (metastore.configOverrides() != null) { + props.putAll(metastore.configOverrides()); + } + return props; + } + + /** Builds llap-daemon-site.xml properties. */ + public static Map getLlapDaemonSite(HiveClusterSpec spec) { + LlapSpec llap = spec.llap(); + Map props = new LinkedHashMap<>(); + + props.put("hive.llap.daemon.memory.per.instance.mb", + String.valueOf(llap.memoryMb())); + props.put("hive.llap.daemon.num.executors", + String.valueOf(llap.executors())); + props.put("hive.llap.daemon.service.hosts", llap.serviceHosts()); + props.put("hive.zookeeper.quorum", spec.zookeeper().quorum()); + + if (llap.configOverrides() != null) { + props.putAll(llap.configOverrides()); + } + return props; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java new file mode 100644 index 000000000000..dcf0cc43b3c6 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.util; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** Standard Kubernetes label and selector helpers following recommended label conventions. */ +public final class Labels { + + public static final String APP_NAME = "app.kubernetes.io/name"; + public static final String APP_INSTANCE = "app.kubernetes.io/instance"; + public static final String APP_COMPONENT = "app.kubernetes.io/component"; + public static final String MANAGED_BY = "app.kubernetes.io/managed-by"; + public static final String MANAGED_BY_VALUE = "hive-kubernetes-operator"; + + private Labels() { + } + + /** + * Returns the full set of labels for a component's Kubernetes resource. + * + * @param hc the HiveCluster resource + * @param component component name (metastore, hiveserver2, llap, tezam, schema-init) + * @return label map + */ + public static Map forComponent(HiveCluster hc, + String component) { + Map labels = new LinkedHashMap<>(); + labels.put(APP_NAME, "apache-hive"); + labels.put(APP_INSTANCE, hc.getMetadata().getName()); + labels.put(APP_COMPONENT, component); + labels.put(MANAGED_BY, MANAGED_BY_VALUE); + return labels; + } + + /** + * Returns the minimal selector labels for matching pods of a component. + * + * @param hc the HiveCluster resource + * @param component component name + * @return selector map + */ + public static Map selectorForComponent(HiveCluster hc, + String component) { + Map selector = new LinkedHashMap<>(); + selector.put(APP_INSTANCE, hc.getMetadata().getName()); + selector.put(APP_COMPONENT, component); + return selector; + } +} diff --git a/packaging/src/kubernetes/src/resources/log4j2.xml b/packaging/src/kubernetes/src/resources/log4j2.xml new file mode 100644 index 000000000000..f906eb0fdf29 --- /dev/null +++ b/packaging/src/kubernetes/src/resources/log4j2.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + diff --git a/pom.xml b/pom.xml index 05507d09a4fe..4481607165db 100644 --- a/pom.xml +++ b/pom.xml @@ -99,6 +99,8 @@ 3.1.0 2.16.0 3.6.0 + 5.3.4 + 7.7.0 3.5.3 2.7.10 2.3.0