Adventures of a wannabe geek!

Ranting within

Building a Riak Cluster in AWS With Packer and Terraform

Following my pattern of building AMIs for applications, I create my riak cluster with Packer for my AMI and Terraform for the infrastructure

Building Riak AMIs with Packer

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
{
  "variables": {
    "ami_id": "",
    "private_subnet_id": "",
    "security_group_id": "",
    "packer_build_number": "",
  },
  "description": "Riak Image",
  "builders": [
    {
      "ami_name": "riak-{{user `packer_build_number`}}",
      "availability_zone": "eu-west-1a",
      "iam_instance_profile": "app-server",
      "instance_type": "t2.small",
      "region": "eu-west-1",
      "run_tags": {
        "role": "packer"
      },
      "security_group_ids": [
        "{{user `security_group_id`}}"
      ],
      "source_ami": "{{user `ami_id`}}",
      "ssh_timeout": "10m",
      "ssh_username": "ubuntu",
      "subnet_id": "{{user `private_subnet_id`}}",
      "tags": {
        "Name": "riak-packer-image"
      },
      "type": "amazon-ebs"
    }
  ],
  "provisioners": [
    {
      "type": "shell",
      "inline": [ "sleep 10" ]
    },
    {
      "type": "shell",
      "script": "install_dependencies.sh",
      "execute_command": "echo '' | {{ .Vars }} sudo -E -S sh '{{ .Path }}'"
    },
    {
      "type": "ansible-local",
      "playbook_file": "riak.yml",
      "extra_arguments": [
        "--module-path=./modules"
      ],
      "playbook_dir": "../../"
    }
  ]
}

The install_dependencies.sh script is as described previously

The ansible playbook for Riak looks as follows:

1
2
3
4
5
6
7
8
9
10
- hosts: all
  sudo: yes

  pre_tasks:
    - ec2_tags:
    - ec2_facts:

  roles:
    - base
    - riak

The base playbook installs a base role for all the base pieces of my system (e.g. Logstash, Sensu-client, prometheus node_exporter) and then proceeds to install riak.

The riak ansible role looks as follows:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
- action: apt_key url={{ riak_key_url }} state=present

- action: apt_repository repo='{{ riak_deb_repo }}' state=present update_cache=yes

- apt: name=riak={{ riak_version }} state=present update_cache=yes

- name: set ulimit
  copy: src=etc-default-riak dest=/etc/default/riak owner=root group=root mode=0644

- name: template riak configuration
  template: src=riak.j2 dest=/etc/riak/riak.conf owner=riak mode=0644
  register: restart_riak

- name: restart riak
  service: name=riak state=started

The role itself is very simple. The riak cluster settings are all held in the riak.j2 template file. Notice that the riak template has the following line in it:

1
riak_control = on

The variables for the riak playbook look as follows:

1
2
3
riak_key_url: "https://packagecloud.io/gpg.key"
riak_deb_repo: "deb https://packagecloud.io/basho/riak/ubuntu/ trusty main"
riak_version: 2.1.1-1

Deploying Riak with Terraform

The infrastructure of the Riak cluster is pretty simple:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
resource "aws_elb" "riak_v2_elb" {
  name = "riak-elb-v2"
  subnets = ["${aws_subnet.primary-private.id}","${aws_subnet.secondary-private.id}","${aws_subnet.tertiary-private.id}"]
  security_groups = ["${aws_security_group.riak_elb.id}"]
  cross_zone_load_balancing = true
  connection_draining = true
  internal = true

  listener {
    instance_port      = 8098
    instance_protocol  = "tcp"
    lb_port            = 8098
    lb_protocol        = "tcp"
  }

  health_check {
    healthy_threshold   = 2
    unhealthy_threshold = 2
    interval            = 10
    target              = "HTTP:8098/ping"
    timeout             = 5
  }
}

resource "aws_security_group" "riak" {
  name = "riak-sg"
  description = "Riak Security Group"
  vpc_id = "${aws_vpc.default.id}"

  ingress {
    from_port = 8098
    to_port   = 8098
    protocol  = "tcp"
    security_groups = ["${aws_security_group.riak_elb.id}"]
  }

  ingress {
    from_port = 8098
    to_port   = 8098
    protocol  = "tcp"
    cidr_blocks = ["0.0.0.0/0"]
  }

  egress {
    from_port = "0"
    to_port = "0"
    protocol = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags {
    Name = "Riak Node"
  }
}

resource "aws_security_group_rule" "riak_all_tcp" {
    type = "ingress"
    from_port = 0
    to_port = 65535
    protocol = "tcp"
    security_group_id = "${aws_security_group.riak.id}"
    source_security_group_id = "${aws_security_group.riak.id}"
}

resource "aws_security_group" "riak_elb" {
  name = "riak-elb-sg"
  description = "Riak Elastic Load Balancer Security Group"
  vpc_id = "${aws_vpc.default.id}"

  ingress {
    from_port = 8098
    to_port   = 8098
    protocol  = "tcp"
    security_groups = ["${aws_security_group.node.id}"]
  }

  ingress {
    from_port = 8098
    to_port   = 8098
    protocol  = "tcp"
    cidr_blocks = ["0.0.0.0/0"]
  }

  egress {
    from_port = "0"
    to_port = "0"
    protocol = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags {
    Name = "Riak Load Balancer"
  }
}

resource "aws_autoscaling_group" "riak_v2_autoscale_group" {
  name = "riak-v2-autoscale-group"
  availability_zones = ["${aws_subnet.primary-private.availability_zone}","${aws_subnet.secondary-private.availability_zone}","${aws_subnet.tertiary-private.availability_zone}"]
  vpc_zone_identifier = ["${aws_subnet.primary-private.id}","${aws_subnet.secondary-private.id}","${aws_subnet.tertiary-private.id}"]
  launch_configuration = "${aws_launch_configuration.riak_launch_config.id}"
  min_size = 0
  max_size = 100
  health_check_type = "EC2"

  tag {
    key = "Name"
    value = "riak"
    propagate_at_launch = true
  }

  tag {
    key = "role"
    value = "riak"
    propagate_at_launch = true
  }

  tag {
    key = "elb_name"
    value = "${aws_elb.riak_v2_elb.name}"
    propagate_at_launch = true
  }

  tag {
    key = "elb_region"
    value = "${var.aws_region}"
    propagate_at_launch = true
  }
}

resource "aws_launch_configuration" "riak_launch_config" {
  image_id = "${var.riak_ami_id}"
  instance_type = "${var.riak_instance_type}"
  iam_instance_profile = "app-server"
  key_name = "${aws_key_pair.terraform.key_name}"
  security_groups = ["${aws_security_group.riak.id}","${aws_security_group.node.id}"]
  enable_monitoring = false

  lifecycle {
    create_before_destroy = true
  }

  root_block_device {
    volume_size = "${var.driak_volume_size}"
  }
}