-
Notifications
You must be signed in to change notification settings - Fork 8
/
Vagrantfile
99 lines (83 loc) · 4.01 KB
/
Vagrantfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# -*- mode: ruby -*-
# vi: set ft=ruby :
# All Vagrant configuration is done below. The "2" in Vagrant.configure
# configures the configuration version (we support older styles for
# backwards compatibility). Please don't change it unless you know what
# you're doing.
Vagrant.configure("2") do |config|
# The most common configuration options are documented and commented below.
# For a complete reference, please see the online documentation at
# https://docs.vagrantup.com.
# The name of the Virtual Machine
VM_NAME = "debian-hadoop"
# Every Vagrant development environment requires a box. You can search for
# boxes at https://vagrantcloud.com/search.
config.vm.box = "debian/bullseye64"
config.vm.box_version = "11.20220912.1"
config.vm.define VM_NAME
# Disable automatic box update checking. If you disable this, then
# boxes will only be checked for updates when the user runs
# `vagrant box outdated`. This is not recommended.
# config.vm.box_check_update = false
# Create a forwarded port mapping which allows access to a specific port
# within the machine from a port on the host machine. In the example below,
# accessing "localhost:8080" will access port 80 on the guest machine.
# NOTE: This will enable public access to the opened port
# config.vm.network "forwarded_port", guest: 80, host: 8080
# Create a forwarded port mapping which allows access to a specific port
# within the machine from a port on the host machine and only allow access
# via 127.0.0.1 to disable public access
config.vm.network "forwarded_port", guest: 8888, host: 8888
config.vm.network "forwarded_port", guest: 4040, host: 4040
# Create a private network, which allows host-only access to the machine
# using a specific IP.
# config.vm.network "private_network", ip: "192.168.33.10"
# Create a public network, which generally matched to bridged network.
# Bridged networks make the machine appear as another physical device on
# your network.
# config.vm.network "public_network"
# Share an additional folder to the guest VM. The first argument is
# the path on the host to the actual folder. The second argument is
# the path on the guest to mount the folder. And the optional third
# argument is a set of non-required options.
# config.vm.synced_folder "../data", "/vagrant_data"
# Provider-specific configuration so you can fine-tune various
# backing providers for Vagrant. These expose provider-specific options.
# Example for VirtualBox:
#
config.vm.provider "virtualbox" do |vb|
vb.name = VM_NAME
# # Display the VirtualBox GUI when booting the machine
# vb.gui = true
# # Customize the amount of memory on the VM:
vb.memory = "4096"
end
config.vm.provider "docker" do |d, override|
override.vm.box = nil
override.vm.box_version = "1.0.0"
d.image = "tknerr/baseimage-ubuntu:20.04"
d.has_ssh = true
end
#
# View the documentation for the provider you are using for more
# information on available options.
# Enable provisioning with a shell script. Additional provisioners such as
# Ansible, Chef, Docker, Puppet and Salt are also available. Please see the
# documentation for more information about their specific syntax and use.
# config.vm.provision "shell", inline: <<-SHELL
# apt-get update
# apt-get install -y apache2
# SHELL
config.vm.provision "prerequisites", type: "shell", path: "scripts/01_prerequisites.sh"
config.vm.provision "install_hadoop", type: "shell", path: "scripts/02_install_hadoop.sh", env:
{
"HADOOP_VERSION" => "3.3.6",
"HADOOP_DOWNLOAD_URL" => "https://dlcdn.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz"
}
config.vm.provision "install_spark", type: "shell", path: "scripts/03_install_spark.sh", env:
{
"SPARK_VERSION" => "3.5.0-bin-hadoop3",
"SPARK_DOWNLOAD_URL" => "https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz"
}
config.vm.provision "install_jupyter", type: "shell", path: "scripts/06_install_jupyter.sh"
end