참고링크: https://www.youtube.com/watch?v=pp_1_Ds1980
#Dockerfile
FROM apache/airflow:2.7.1-python3.11
USER root
RUN apt-get update && \
apt-get install -y gcc python3-dev openjdk-11-jdk && \
apt-get clean
# Set JAVA_HOME environment variable
ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-arm64
USER airflow
RUN pip install apache-airflow apache-airflow-providers-apache-spark pyspark
#docker compose
version: '3'
x-spark-common: &spark-common
image: bitnami/spark:latest
volumes:
- ./jobs:/opt/bitnami/spark/jobs
networks:
- code-with-yu
x-airflow-common: &airflow-common
build:
context: .
dockerfile: Dockerfile
env_file:
- airflow.env
volumes:
- ./jobs:/opt/airflow/jobs
- ./dags:/opt/airflow/dags
- ./logs:/opt/airflow/logs
depends_on:
- postgres
networks:
- code-with-yu
services:
spark-master:
<<: *spark-common
command: bin/spark-class org.apache.spark.deploy.master.Master
ports:
- "9090:8080"
- "7077:7077"
spark-worker:
<<: *spark-common
command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077
depends_on:
- spark-master
environment:
SPARK_MODE: worker
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_MASTER_URL: spark://spark-master:7077
postgres:
image: postgres
environment:
- POSTGRES_USER=airflow
- POSTGRES_PASSWORD=airflow
- POSTGRES_DB=airflow
networks:
- code-with-yu
webserver:
<<: *airflow-common
command: webserver
ports:
- "8080:8080"
depends_on:
- scheduler
scheduler:
<<: *airflow-common
command: bash -c "airflow db init && airflow db migrate && airflow users create --username admin --firstname Yusuf --lastname Ganiyu --role Admin --email airscholar@gmail.com --password admin && airflow scheduler"
networks:
code-with-yu:
Spark connection 연결하기
결과
'프로그래밍 > airflow' 카테고리의 다른 글
[Airflow] Azure Databricks Spark submit (0) | 2024.06.05 |
---|---|
[airflow]Kubernetes dags github 연동 (1) | 2023.11.24 |
[airflow] KubernetesExecutor 환경 구축하기 Local_mount.ver (2) | 2023.11.22 |
[Docker]Airflow과 mysql 연동하기 (4) | 2022.07.22 |