Cluster
Provides an Elastic MapReduce Cluster, a web service that makes it easy to process large amounts of data efficiently. See Amazon Elastic MapReduce Documentation for more information.
To configure Instance Groups for task nodes, see the aws.emr.InstanceGroup resource.
Support for Instance Fleets will be made available in an upcoming release.
Example bootable config
NOTE: This configuration demonstrates a minimal configuration needed to boot an example EMR Cluster. It is not meant to display best practices. Please use at your own risk.
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const mainVpc = new aws.ec2.Vpc("mainVpc", {
cidrBlock: "168.31.0.0/16",
enableDnsHostnames: true,
tags: {
name: "emr_test",
},
});
const mainSubnet = new aws.ec2.Subnet("mainSubnet", {
vpcId: mainVpc.id,
cidrBlock: "168.31.0.0/20",
tags: {
name: "emr_test",
},
});
// IAM role for EMR Service
const iamEmrServiceRole = new aws.iam.Role("iamEmrServiceRole", {assumeRolePolicy: `{
"Version": "2008-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "elasticmapreduce.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
`});
// IAM Role for EC2 Instance Profile
const iamEmrProfileRole = new aws.iam.Role("iamEmrProfileRole", {assumeRolePolicy: `{
"Version": "2008-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
`});
const emrProfile = new aws.iam.InstanceProfile("emrProfile", {roles: [iamEmrProfileRole.name]});
const cluster = new aws.emr.Cluster("cluster", {
releaseLabel: "emr-4.6.0",
applications: ["Spark"],
ec2Attributes: {
subnetId: mainSubnet.id,
emrManagedMasterSecurityGroup: aws_security_group.allow_all.id,
emrManagedSlaveSecurityGroup: aws_security_group.allow_all.id,
instanceProfile: emrProfile.arn,
},
masterInstanceType: "m5.xlarge",
coreInstanceType: "m5.xlarge",
coreInstanceCount: 1,
tags: {
role: "rolename",
dns_zone: "env_zone",
env: "env",
name: "name-env",
},
bootstrapActions: [{
path: "s3://elasticmapreduce/bootstrap-actions/run-if",
name: "runif",
args: [
"instance.isMaster=true",
"echo running on master node",
],
}],
configurationsJson: ` [
{
"Classification": "hadoop-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
},
{
"Classification": "spark-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
}
]
`,
serviceRole: iamEmrServiceRole.arn,
});
const allowAccess = new aws.ec2.SecurityGroup("allowAccess", {
description: "Allow inbound traffic",
vpcId: mainVpc.id,
ingress: [{
fromPort: 0,
toPort: 0,
protocol: "-1",
cidrBlocks: mainVpc.cidrBlock,
}],
egress: [{
fromPort: 0,
toPort: 0,
protocol: "-1",
cidrBlocks: ["0.0.0.0/0"],
}],
tags: {
name: "emr_test",
},
}, {
dependsOn: ["aws_subnet.main"],
});
const gw = new aws.ec2.InternetGateway("gw", {vpcId: mainVpc.id});
const routeTable = new aws.ec2.RouteTable("routeTable", {
vpcId: mainVpc.id,
routes: [{
cidrBlock: "0.0.0.0/0",
gatewayId: gw.id,
}],
});
const mainRouteTableAssociation = new aws.ec2.MainRouteTableAssociation("mainRouteTableAssociation", {
vpcId: mainVpc.id,
routeTableId: routeTable.id,
});
//##
const iamEmrServicePolicy = new aws.iam.RolePolicy("iamEmrServicePolicy", {
role: iamEmrServiceRole.id,
policy: `{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Resource": "*",
"Action": [
"ec2:AuthorizeSecurityGroupEgress",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:CancelSpotInstanceRequests",
"ec2:CreateNetworkInterface",
"ec2:CreateSecurityGroup",
"ec2:CreateTags",
"ec2:DeleteNetworkInterface",
"ec2:DeleteSecurityGroup",
"ec2:DeleteTags",
"ec2:DescribeAvailabilityZones",
"ec2:DescribeAccountAttributes",
"ec2:DescribeDhcpOptions",
"ec2:DescribeInstanceStatus",
"ec2:DescribeInstances",
"ec2:DescribeKeyPairs",
"ec2:DescribeNetworkAcls",
"ec2:DescribeNetworkInterfaces",
"ec2:DescribePrefixLists",
"ec2:DescribeRouteTables",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSpotInstanceRequests",
"ec2:DescribeSpotPriceHistory",
"ec2:DescribeSubnets",
"ec2:DescribeVpcAttribute",
"ec2:DescribeVpcEndpoints",
"ec2:DescribeVpcEndpointServices",
"ec2:DescribeVpcs",
"ec2:DetachNetworkInterface",
"ec2:ModifyImageAttribute",
"ec2:ModifyInstanceAttribute",
"ec2:RequestSpotInstances",
"ec2:RevokeSecurityGroupEgress",
"ec2:RunInstances",
"ec2:TerminateInstances",
"ec2:DeleteVolume",
"ec2:DescribeVolumeStatus",
"ec2:DescribeVolumes",
"ec2:DetachVolume",
"iam:GetRole",
"iam:GetRolePolicy",
"iam:ListInstanceProfiles",
"iam:ListRolePolicies",
"iam:PassRole",
"s3:CreateBucket",
"s3:Get*",
"s3:List*",
"sdb:BatchPutAttributes",
"sdb:Select",
"sqs:CreateQueue",
"sqs:Delete*",
"sqs:GetQueue*",
"sqs:PurgeQueue",
"sqs:ReceiveMessage"
]
}]
}
`,
});
const iamEmrProfilePolicy = new aws.iam.RolePolicy("iamEmrProfilePolicy", {
role: iamEmrProfileRole.id,
policy: `{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Resource": "*",
"Action": [
"cloudwatch:*",
"dynamodb:*",
"ec2:Describe*",
"elasticmapreduce:Describe*",
"elasticmapreduce:ListBootstrapActions",
"elasticmapreduce:ListClusters",
"elasticmapreduce:ListInstanceGroups",
"elasticmapreduce:ListInstances",
"elasticmapreduce:ListSteps",
"kinesis:CreateStream",
"kinesis:DeleteStream",
"kinesis:DescribeStream",
"kinesis:GetRecords",
"kinesis:GetShardIterator",
"kinesis:MergeShards",
"kinesis:PutRecord",
"kinesis:SplitShard",
"rds:Describe*",
"s3:*",
"sdb:*",
"sns:*",
"sqs:*"
]
}]
}
`,
});import pulumi
import pulumi_aws as aws
main_vpc = aws.ec2.Vpc("mainVpc",
cidr_block="168.31.0.0/16",
enable_dns_hostnames=True,
tags={
"name": "emr_test",
})
main_subnet = aws.ec2.Subnet("mainSubnet",
vpc_id=main_vpc.id,
cidr_block="168.31.0.0/20",
tags={
"name": "emr_test",
})
# IAM role for EMR Service
iam_emr_service_role = aws.iam.Role("iamEmrServiceRole", assume_role_policy="""{
"Version": "2008-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "elasticmapreduce.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
""")
# IAM Role for EC2 Instance Profile
iam_emr_profile_role = aws.iam.Role("iamEmrProfileRole", assume_role_policy="""{
"Version": "2008-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
""")
emr_profile = aws.iam.InstanceProfile("emrProfile", roles=[iam_emr_profile_role.name])
cluster = aws.emr.Cluster("cluster",
release_label="emr-4.6.0",
applications=["Spark"],
ec2_attributes={
"subnet_id": main_subnet.id,
"emrManagedMasterSecurityGroup": aws_security_group["allow_all"]["id"],
"emrManagedSlaveSecurityGroup": aws_security_group["allow_all"]["id"],
"instanceProfile": emr_profile.arn,
},
master_instance_type="m5.xlarge",
core_instance_type="m5.xlarge",
core_instance_count=1,
tags={
"role": "rolename",
"dns_zone": "env_zone",
"env": "env",
"name": "name-env",
},
bootstrap_actions=[{
"path": "s3://elasticmapreduce/bootstrap-actions/run-if",
"name": "runif",
"args": [
"instance.isMaster=true",
"echo running on master node",
],
}],
configurations_json=""" [
{
"Classification": "hadoop-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
},
{
"Classification": "spark-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
}
]
""",
service_role=iam_emr_service_role.arn)
allow_access = aws.ec2.SecurityGroup("allowAccess",
description="Allow inbound traffic",
vpc_id=main_vpc.id,
ingress=[{
"from_port": 0,
"to_port": 0,
"protocol": "-1",
"cidr_blocks": main_vpc.cidr_block,
}],
egress=[{
"from_port": 0,
"to_port": 0,
"protocol": "-1",
"cidr_blocks": ["0.0.0.0/0"],
}],
tags={
"name": "emr_test",
},
opts=ResourceOptions(depends_on=["aws_subnet.main"]))
gw = aws.ec2.InternetGateway("gw", vpc_id=main_vpc.id)
route_table = aws.ec2.RouteTable("routeTable",
vpc_id=main_vpc.id,
routes=[{
"cidr_block": "0.0.0.0/0",
"gateway_id": gw.id,
}])
main_route_table_association = aws.ec2.MainRouteTableAssociation("mainRouteTableAssociation",
vpc_id=main_vpc.id,
route_table_id=route_table.id)
###
iam_emr_service_policy = aws.iam.RolePolicy("iamEmrServicePolicy",
role=iam_emr_service_role.id,
policy="""{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Resource": "*",
"Action": [
"ec2:AuthorizeSecurityGroupEgress",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:CancelSpotInstanceRequests",
"ec2:CreateNetworkInterface",
"ec2:CreateSecurityGroup",
"ec2:CreateTags",
"ec2:DeleteNetworkInterface",
"ec2:DeleteSecurityGroup",
"ec2:DeleteTags",
"ec2:DescribeAvailabilityZones",
"ec2:DescribeAccountAttributes",
"ec2:DescribeDhcpOptions",
"ec2:DescribeInstanceStatus",
"ec2:DescribeInstances",
"ec2:DescribeKeyPairs",
"ec2:DescribeNetworkAcls",
"ec2:DescribeNetworkInterfaces",
"ec2:DescribePrefixLists",
"ec2:DescribeRouteTables",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSpotInstanceRequests",
"ec2:DescribeSpotPriceHistory",
"ec2:DescribeSubnets",
"ec2:DescribeVpcAttribute",
"ec2:DescribeVpcEndpoints",
"ec2:DescribeVpcEndpointServices",
"ec2:DescribeVpcs",
"ec2:DetachNetworkInterface",
"ec2:ModifyImageAttribute",
"ec2:ModifyInstanceAttribute",
"ec2:RequestSpotInstances",
"ec2:RevokeSecurityGroupEgress",
"ec2:RunInstances",
"ec2:TerminateInstances",
"ec2:DeleteVolume",
"ec2:DescribeVolumeStatus",
"ec2:DescribeVolumes",
"ec2:DetachVolume",
"iam:GetRole",
"iam:GetRolePolicy",
"iam:ListInstanceProfiles",
"iam:ListRolePolicies",
"iam:PassRole",
"s3:CreateBucket",
"s3:Get*",
"s3:List*",
"sdb:BatchPutAttributes",
"sdb:Select",
"sqs:CreateQueue",
"sqs:Delete*",
"sqs:GetQueue*",
"sqs:PurgeQueue",
"sqs:ReceiveMessage"
]
}]
}
""")
iam_emr_profile_policy = aws.iam.RolePolicy("iamEmrProfilePolicy",
role=iam_emr_profile_role.id,
policy="""{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Resource": "*",
"Action": [
"cloudwatch:*",
"dynamodb:*",
"ec2:Describe*",
"elasticmapreduce:Describe*",
"elasticmapreduce:ListBootstrapActions",
"elasticmapreduce:ListClusters",
"elasticmapreduce:ListInstanceGroups",
"elasticmapreduce:ListInstances",
"elasticmapreduce:ListSteps",
"kinesis:CreateStream",
"kinesis:DeleteStream",
"kinesis:DescribeStream",
"kinesis:GetRecords",
"kinesis:GetShardIterator",
"kinesis:MergeShards",
"kinesis:PutRecord",
"kinesis:SplitShard",
"rds:Describe*",
"s3:*",
"sdb:*",
"sns:*",
"sqs:*"
]
}]
}
""")using Pulumi;
using Aws = Pulumi.Aws;
class MyStack : Stack
{
public MyStack()
{
var mainVpc = new Aws.Ec2.Vpc("mainVpc", new Aws.Ec2.VpcArgs
{
CidrBlock = "168.31.0.0/16",
EnableDnsHostnames = true,
Tags =
{
{ "name", "emr_test" },
},
});
var mainSubnet = new Aws.Ec2.Subnet("mainSubnet", new Aws.Ec2.SubnetArgs
{
VpcId = mainVpc.Id,
CidrBlock = "168.31.0.0/20",
Tags =
{
{ "name", "emr_test" },
},
});
// IAM role for EMR Service
var iamEmrServiceRole = new Aws.Iam.Role("iamEmrServiceRole", new Aws.Iam.RoleArgs
{
AssumeRolePolicy = @"{
""Version"": ""2008-10-17"",
""Statement"": [
{
""Sid"": """",
""Effect"": ""Allow"",
""Principal"": {
""Service"": ""elasticmapreduce.amazonaws.com""
},
""Action"": ""sts:AssumeRole""
}
]
}
",
});
// IAM Role for EC2 Instance Profile
var iamEmrProfileRole = new Aws.Iam.Role("iamEmrProfileRole", new Aws.Iam.RoleArgs
{
AssumeRolePolicy = @"{
""Version"": ""2008-10-17"",
""Statement"": [
{
""Sid"": """",
""Effect"": ""Allow"",
""Principal"": {
""Service"": ""ec2.amazonaws.com""
},
""Action"": ""sts:AssumeRole""
}
]
}
",
});
var emrProfile = new Aws.Iam.InstanceProfile("emrProfile", new Aws.Iam.InstanceProfileArgs
{
Roles =
{
iamEmrProfileRole.Name,
},
});
var cluster = new Aws.Emr.Cluster("cluster", new Aws.Emr.ClusterArgs
{
ReleaseLabel = "emr-4.6.0",
Applications =
{
"Spark",
},
Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
{
SubnetId = mainSubnet.Id,
EmrManagedMasterSecurityGroup = aws_security_group.Allow_all.Id,
EmrManagedSlaveSecurityGroup = aws_security_group.Allow_all.Id,
InstanceProfile = emrProfile.Arn,
},
MasterInstanceType = "m5.xlarge",
CoreInstanceType = "m5.xlarge",
CoreInstanceCount = 1,
Tags =
{
{ "role", "rolename" },
{ "dns_zone", "env_zone" },
{ "env", "env" },
{ "name", "name-env" },
},
BootstrapActions =
{
new Aws.Emr.Inputs.ClusterBootstrapActionArgs
{
Path = "s3://elasticmapreduce/bootstrap-actions/run-if",
Name = "runif",
Args =
{
"instance.isMaster=true",
"echo running on master node",
},
},
},
ConfigurationsJson = @" [
{
""Classification"": ""hadoop-env"",
""Configurations"": [
{
""Classification"": ""export"",
""Properties"": {
""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
}
}
],
""Properties"": {}
},
{
""Classification"": ""spark-env"",
""Configurations"": [
{
""Classification"": ""export"",
""Properties"": {
""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
}
}
],
""Properties"": {}
}
]
",
ServiceRole = iamEmrServiceRole.Arn,
});
var allowAccess = new Aws.Ec2.SecurityGroup("allowAccess", new Aws.Ec2.SecurityGroupArgs
{
Description = "Allow inbound traffic",
VpcId = mainVpc.Id,
Ingress =
{
new Aws.Ec2.Inputs.SecurityGroupIngressArgs
{
FromPort = 0,
ToPort = 0,
Protocol = "-1",
CidrBlocks = mainVpc.CidrBlock,
},
},
Egress =
{
new Aws.Ec2.Inputs.SecurityGroupEgressArgs
{
FromPort = 0,
ToPort = 0,
Protocol = "-1",
CidrBlocks =
{
"0.0.0.0/0",
},
},
},
Tags =
{
{ "name", "emr_test" },
},
}, new CustomResourceOptions
{
DependsOn =
{
"aws_subnet.main",
},
});
var gw = new Aws.Ec2.InternetGateway("gw", new Aws.Ec2.InternetGatewayArgs
{
VpcId = mainVpc.Id,
});
var routeTable = new Aws.Ec2.RouteTable("routeTable", new Aws.Ec2.RouteTableArgs
{
VpcId = mainVpc.Id,
Routes =
{
new Aws.Ec2.Inputs.RouteTableRouteArgs
{
CidrBlock = "0.0.0.0/0",
GatewayId = gw.Id,
},
},
});
var mainRouteTableAssociation = new Aws.Ec2.MainRouteTableAssociation("mainRouteTableAssociation", new Aws.Ec2.MainRouteTableAssociationArgs
{
VpcId = mainVpc.Id,
RouteTableId = routeTable.Id,
});
//##
var iamEmrServicePolicy = new Aws.Iam.RolePolicy("iamEmrServicePolicy", new Aws.Iam.RolePolicyArgs
{
Role = iamEmrServiceRole.Id,
Policy = @"{
""Version"": ""2012-10-17"",
""Statement"": [{
""Effect"": ""Allow"",
""Resource"": ""*"",
""Action"": [
""ec2:AuthorizeSecurityGroupEgress"",
""ec2:AuthorizeSecurityGroupIngress"",
""ec2:CancelSpotInstanceRequests"",
""ec2:CreateNetworkInterface"",
""ec2:CreateSecurityGroup"",
""ec2:CreateTags"",
""ec2:DeleteNetworkInterface"",
""ec2:DeleteSecurityGroup"",
""ec2:DeleteTags"",
""ec2:DescribeAvailabilityZones"",
""ec2:DescribeAccountAttributes"",
""ec2:DescribeDhcpOptions"",
""ec2:DescribeInstanceStatus"",
""ec2:DescribeInstances"",
""ec2:DescribeKeyPairs"",
""ec2:DescribeNetworkAcls"",
""ec2:DescribeNetworkInterfaces"",
""ec2:DescribePrefixLists"",
""ec2:DescribeRouteTables"",
""ec2:DescribeSecurityGroups"",
""ec2:DescribeSpotInstanceRequests"",
""ec2:DescribeSpotPriceHistory"",
""ec2:DescribeSubnets"",
""ec2:DescribeVpcAttribute"",
""ec2:DescribeVpcEndpoints"",
""ec2:DescribeVpcEndpointServices"",
""ec2:DescribeVpcs"",
""ec2:DetachNetworkInterface"",
""ec2:ModifyImageAttribute"",
""ec2:ModifyInstanceAttribute"",
""ec2:RequestSpotInstances"",
""ec2:RevokeSecurityGroupEgress"",
""ec2:RunInstances"",
""ec2:TerminateInstances"",
""ec2:DeleteVolume"",
""ec2:DescribeVolumeStatus"",
""ec2:DescribeVolumes"",
""ec2:DetachVolume"",
""iam:GetRole"",
""iam:GetRolePolicy"",
""iam:ListInstanceProfiles"",
""iam:ListRolePolicies"",
""iam:PassRole"",
""s3:CreateBucket"",
""s3:Get*"",
""s3:List*"",
""sdb:BatchPutAttributes"",
""sdb:Select"",
""sqs:CreateQueue"",
""sqs:Delete*"",
""sqs:GetQueue*"",
""sqs:PurgeQueue"",
""sqs:ReceiveMessage""
]
}]
}
",
});
var iamEmrProfilePolicy = new Aws.Iam.RolePolicy("iamEmrProfilePolicy", new Aws.Iam.RolePolicyArgs
{
Role = iamEmrProfileRole.Id,
Policy = @"{
""Version"": ""2012-10-17"",
""Statement"": [{
""Effect"": ""Allow"",
""Resource"": ""*"",
""Action"": [
""cloudwatch:*"",
""dynamodb:*"",
""ec2:Describe*"",
""elasticmapreduce:Describe*"",
""elasticmapreduce:ListBootstrapActions"",
""elasticmapreduce:ListClusters"",
""elasticmapreduce:ListInstanceGroups"",
""elasticmapreduce:ListInstances"",
""elasticmapreduce:ListSteps"",
""kinesis:CreateStream"",
""kinesis:DeleteStream"",
""kinesis:DescribeStream"",
""kinesis:GetRecords"",
""kinesis:GetShardIterator"",
""kinesis:MergeShards"",
""kinesis:PutRecord"",
""kinesis:SplitShard"",
""rds:Describe*"",
""s3:*"",
""sdb:*"",
""sns:*"",
""sqs:*""
]
}]
}
",
});
}
}
package main
import (
"fmt"
"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/ec2"
"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/emr"
"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/iam"
"github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
mainVpc, err := ec2.NewVpc(ctx, "mainVpc", &ec2.VpcArgs{
CidrBlock: pulumi.String("168.31.0.0/16"),
EnableDnsHostnames: pulumi.Bool(true),
Tags: pulumi.StringMap{
"name": pulumi.String("emr_test"),
},
})
if err != nil {
return err
}
mainSubnet, err := ec2.NewSubnet(ctx, "mainSubnet", &ec2.SubnetArgs{
VpcId: mainVpc.ID(),
CidrBlock: pulumi.String("168.31.0.0/20"),
Tags: pulumi.StringMap{
"name": pulumi.String("emr_test"),
},
})
if err != nil {
return err
}
iamEmrServiceRole, err := iam.NewRole(ctx, "iamEmrServiceRole", &iam.RoleArgs{
AssumeRolePolicy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", " \"Version\": \"2008-10-17\",\n", " \"Statement\": [\n", " {\n", " \"Sid\": \"\",\n", " \"Effect\": \"Allow\",\n", " \"Principal\": {\n", " \"Service\": \"elasticmapreduce.amazonaws.com\"\n", " },\n", " \"Action\": \"sts:AssumeRole\"\n", " }\n", " ]\n", "}\n")),
})
if err != nil {
return err
}
iamEmrProfileRole, err := iam.NewRole(ctx, "iamEmrProfileRole", &iam.RoleArgs{
AssumeRolePolicy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", " \"Version\": \"2008-10-17\",\n", " \"Statement\": [\n", " {\n", " \"Sid\": \"\",\n", " \"Effect\": \"Allow\",\n", " \"Principal\": {\n", " \"Service\": \"ec2.amazonaws.com\"\n", " },\n", " \"Action\": \"sts:AssumeRole\"\n", " }\n", " ]\n", "}\n")),
})
if err != nil {
return err
}
emrProfile, err := iam.NewInstanceProfile(ctx, "emrProfile", &iam.InstanceProfileArgs{
Roles: pulumi.StringArray{
iamEmrProfileRole.Name,
},
})
if err != nil {
return err
}
_, err = emr.NewCluster(ctx, "cluster", &emr.ClusterArgs{
ReleaseLabel: pulumi.String("emr-4.6.0"),
Applications: pulumi.StringArray{
pulumi.String("Spark"),
},
Ec2Attributes: &emr.ClusterEc2AttributesArgs{
SubnetId: mainSubnet.ID(),
EmrManagedMasterSecurityGroup: pulumi.String(aws_security_group.Allow_all.Id),
EmrManagedSlaveSecurityGroup: pulumi.String(aws_security_group.Allow_all.Id),
InstanceProfile: emrProfile.Arn,
},
MasterInstanceType: pulumi.String("m5.xlarge"),
CoreInstanceType: pulumi.String("m5.xlarge"),
CoreInstanceCount: pulumi.Int(1),
Tags: pulumi.StringMap{
"role": pulumi.String("rolename"),
"dns_zone": pulumi.String("env_zone"),
"env": pulumi.String("env"),
"name": pulumi.String("name-env"),
},
BootstrapActions: emr.ClusterBootstrapActionArray{
&emr.ClusterBootstrapActionArgs{
Path: pulumi.String("s3://elasticmapreduce/bootstrap-actions/run-if"),
Name: pulumi.String("runif"),
Args: pulumi.StringArray{
pulumi.String("instance.isMaster=true"),
pulumi.String("echo running on master node"),
},
},
},
ConfigurationsJson: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", " [\n", " {\n", " \"Classification\": \"hadoop-env\",\n", " \"Configurations\": [\n", " {\n", " \"Classification\": \"export\",\n", " \"Properties\": {\n", " \"JAVA_HOME\": \"/usr/lib/jvm/java-1.8.0\"\n", " }\n", " }\n", " ],\n", " \"Properties\": {}\n", " },\n", " {\n", " \"Classification\": \"spark-env\",\n", " \"Configurations\": [\n", " {\n", " \"Classification\": \"export\",\n", " \"Properties\": {\n", " \"JAVA_HOME\": \"/usr/lib/jvm/java-1.8.0\"\n", " }\n", " }\n", " ],\n", " \"Properties\": {}\n", " }\n", " ]\n")),
ServiceRole: iamEmrServiceRole.Arn,
})
if err != nil {
return err
}
_, err = ec2.NewSecurityGroup(ctx, "allowAccess", &ec2.SecurityGroupArgs{
Description: pulumi.String("Allow inbound traffic"),
VpcId: mainVpc.ID(),
Ingress: ec2.SecurityGroupIngressArray{
&ec2.SecurityGroupIngressArgs{
FromPort: pulumi.Int(0),
ToPort: pulumi.Int(0),
Protocol: pulumi.String("-1"),
CidrBlocks: mainVpc.CidrBlock,
},
},
Egress: ec2.SecurityGroupEgressArray{
&ec2.SecurityGroupEgressArgs{
FromPort: pulumi.Int(0),
ToPort: pulumi.Int(0),
Protocol: pulumi.String("-1"),
CidrBlocks: pulumi.StringArray{
pulumi.String("0.0.0.0/0"),
},
},
},
Tags: pulumi.StringMap{
"name": pulumi.String("emr_test"),
},
}, pulumi.DependsOn([]pulumi.Resource{
"aws_subnet.main",
}))
if err != nil {
return err
}
gw, err := ec2.NewInternetGateway(ctx, "gw", &ec2.InternetGatewayArgs{
VpcId: mainVpc.ID(),
})
if err != nil {
return err
}
routeTable, err := ec2.NewRouteTable(ctx, "routeTable", &ec2.RouteTableArgs{
VpcId: mainVpc.ID(),
Routes: ec2.RouteTableRouteArray{
&ec2.RouteTableRouteArgs{
CidrBlock: pulumi.String("0.0.0.0/0"),
GatewayId: gw.ID(),
},
},
})
if err != nil {
return err
}
_, err = ec2.NewMainRouteTableAssociation(ctx, "mainRouteTableAssociation", &ec2.MainRouteTableAssociationArgs{
VpcId: mainVpc.ID(),
RouteTableId: routeTable.ID(),
})
if err != nil {
return err
}
_, err = iam.NewRolePolicy(ctx, "iamEmrServicePolicy", &iam.RolePolicyArgs{
Role: iamEmrServiceRole.ID(),
Policy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", " \"Version\": \"2012-10-17\",\n", " \"Statement\": [{\n", " \"Effect\": \"Allow\",\n", " \"Resource\": \"*\",\n", " \"Action\": [\n", " \"ec2:AuthorizeSecurityGroupEgress\",\n", " \"ec2:AuthorizeSecurityGroupIngress\",\n", " \"ec2:CancelSpotInstanceRequests\",\n", " \"ec2:CreateNetworkInterface\",\n", " \"ec2:CreateSecurityGroup\",\n", " \"ec2:CreateTags\",\n", " \"ec2:DeleteNetworkInterface\",\n", " \"ec2:DeleteSecurityGroup\",\n", " \"ec2:DeleteTags\",\n", " \"ec2:DescribeAvailabilityZones\",\n", " \"ec2:DescribeAccountAttributes\",\n", " \"ec2:DescribeDhcpOptions\",\n", " \"ec2:DescribeInstanceStatus\",\n", " \"ec2:DescribeInstances\",\n", " \"ec2:DescribeKeyPairs\",\n", " \"ec2:DescribeNetworkAcls\",\n", " \"ec2:DescribeNetworkInterfaces\",\n", " \"ec2:DescribePrefixLists\",\n", " \"ec2:DescribeRouteTables\",\n", " \"ec2:DescribeSecurityGroups\",\n", " \"ec2:DescribeSpotInstanceRequests\",\n", " \"ec2:DescribeSpotPriceHistory\",\n", " \"ec2:DescribeSubnets\",\n", " \"ec2:DescribeVpcAttribute\",\n", " \"ec2:DescribeVpcEndpoints\",\n", " \"ec2:DescribeVpcEndpointServices\",\n", " \"ec2:DescribeVpcs\",\n", " \"ec2:DetachNetworkInterface\",\n", " \"ec2:ModifyImageAttribute\",\n", " \"ec2:ModifyInstanceAttribute\",\n", " \"ec2:RequestSpotInstances\",\n", " \"ec2:RevokeSecurityGroupEgress\",\n", " \"ec2:RunInstances\",\n", " \"ec2:TerminateInstances\",\n", " \"ec2:DeleteVolume\",\n", " \"ec2:DescribeVolumeStatus\",\n", " \"ec2:DescribeVolumes\",\n", " \"ec2:DetachVolume\",\n", " \"iam:GetRole\",\n", " \"iam:GetRolePolicy\",\n", " \"iam:ListInstanceProfiles\",\n", " \"iam:ListRolePolicies\",\n", " \"iam:PassRole\",\n", " \"s3:CreateBucket\",\n", " \"s3:Get*\",\n", " \"s3:List*\",\n", " \"sdb:BatchPutAttributes\",\n", " \"sdb:Select\",\n", " \"sqs:CreateQueue\",\n", " \"sqs:Delete*\",\n", " \"sqs:GetQueue*\",\n", " \"sqs:PurgeQueue\",\n", " \"sqs:ReceiveMessage\"\n", " ]\n", " }]\n", "}\n")),
})
if err != nil {
return err
}
_, err = iam.NewRolePolicy(ctx, "iamEmrProfilePolicy", &iam.RolePolicyArgs{
Role: iamEmrProfileRole.ID(),
Policy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", " \"Version\": \"2012-10-17\",\n", " \"Statement\": [{\n", " \"Effect\": \"Allow\",\n", " \"Resource\": \"*\",\n", " \"Action\": [\n", " \"cloudwatch:*\",\n", " \"dynamodb:*\",\n", " \"ec2:Describe*\",\n", " \"elasticmapreduce:Describe*\",\n", " \"elasticmapreduce:ListBootstrapActions\",\n", " \"elasticmapreduce:ListClusters\",\n", " \"elasticmapreduce:ListInstanceGroups\",\n", " \"elasticmapreduce:ListInstances\",\n", " \"elasticmapreduce:ListSteps\",\n", " \"kinesis:CreateStream\",\n", " \"kinesis:DeleteStream\",\n", " \"kinesis:DescribeStream\",\n", " \"kinesis:GetRecords\",\n", " \"kinesis:GetShardIterator\",\n", " \"kinesis:MergeShards\",\n", " \"kinesis:PutRecord\",\n", " \"kinesis:SplitShard\",\n", " \"rds:Describe*\",\n", " \"s3:*\",\n", " \"sdb:*\",\n", " \"sns:*\",\n", " \"sqs:*\"\n", " ]\n", " }]\n", "}\n")),
})
if err != nil {
return err
}
return nil
})
}Example Usage
using Pulumi;
using Aws = Pulumi.Aws;
class MyStack : Stack
{
public MyStack()
{
var cluster = new Aws.Emr.Cluster("cluster", new Aws.Emr.ClusterArgs
{
AdditionalInfo = @"{
""instanceAwsClientConfiguration"": {
""proxyPort"": 8099,
""proxyHost"": ""myproxy.example.com""
}
}
",
Applications =
{
"Spark",
},
BootstrapActions =
{
new Aws.Emr.Inputs.ClusterBootstrapActionArgs
{
Args =
{
"instance.isMaster=true",
"echo running on master node",
},
Name = "runif",
Path = "s3://elasticmapreduce/bootstrap-actions/run-if",
},
},
ConfigurationsJson = @" [
{
""Classification"": ""hadoop-env"",
""Configurations"": [
{
""Classification"": ""export"",
""Properties"": {
""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
}
}
],
""Properties"": {}
},
{
""Classification"": ""spark-env"",
""Configurations"": [
{
""Classification"": ""export"",
""Properties"": {
""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
}
}
],
""Properties"": {}
}
]
",
CoreInstanceGroup = new Aws.Emr.Inputs.ClusterCoreInstanceGroupArgs
{
AutoscalingPolicy = @"{
""Constraints"": {
""MinCapacity"": 1,
""MaxCapacity"": 2
},
""Rules"": [
{
""Name"": ""ScaleOutMemoryPercentage"",
""Description"": ""Scale out if YARNMemoryAvailablePercentage is less than 15"",
""Action"": {
""SimpleScalingPolicyConfiguration"": {
""AdjustmentType"": ""CHANGE_IN_CAPACITY"",
""ScalingAdjustment"": 1,
""CoolDown"": 300
}
},
""Trigger"": {
""CloudWatchAlarmDefinition"": {
""ComparisonOperator"": ""LESS_THAN"",
""EvaluationPeriods"": 1,
""MetricName"": ""YARNMemoryAvailablePercentage"",
""Namespace"": ""AWS/ElasticMapReduce"",
""Period"": 300,
""Statistic"": ""AVERAGE"",
""Threshold"": 15.0,
""Unit"": ""PERCENT""
}
}
}
]
}
",
BidPrice = "0.30",
EbsConfigs =
{
new Aws.Emr.Inputs.ClusterCoreInstanceGroupEbsConfigArgs
{
Size = 40,
Type = "gp2",
VolumesPerInstance = 1,
},
},
InstanceCount = 1,
InstanceType = "c4.large",
},
EbsRootVolumeSize = 100,
Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
{
EmrManagedMasterSecurityGroup = aws_security_group.Sg.Id,
EmrManagedSlaveSecurityGroup = aws_security_group.Sg.Id,
InstanceProfile = aws_iam_instance_profile.Emr_profile.Arn,
SubnetId = aws_subnet.Main.Id,
},
KeepJobFlowAliveWhenNoSteps = true,
MasterInstanceGroup = new Aws.Emr.Inputs.ClusterMasterInstanceGroupArgs
{
InstanceType = "m4.large",
},
ReleaseLabel = "emr-4.6.0",
ServiceRole = aws_iam_role.Iam_emr_service_role.Arn,
Tags =
{
{ "env", "env" },
{ "role", "rolename" },
},
TerminationProtection = false,
});
}
}
package main
import (
"fmt"
"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/emr"
"github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := emr.NewCluster(ctx, "cluster", &emr.ClusterArgs{
AdditionalInfo: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v", "{\n", " \"instanceAwsClientConfiguration\": {\n", " \"proxyPort\": 8099,\n", " \"proxyHost\": \"myproxy.example.com\"\n", " }\n", "}\n", "\n")),
Applications: pulumi.StringArray{
pulumi.String("Spark"),
},
BootstrapActions: emr.ClusterBootstrapActionArray{
&emr.ClusterBootstrapActionArgs{
Args: pulumi.StringArray{
pulumi.String("instance.isMaster=true"),
pulumi.String("echo running on master node"),
},
Name: pulumi.String("runif"),
Path: pulumi.String("s3://elasticmapreduce/bootstrap-actions/run-if"),
},
},
ConfigurationsJson: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", " [\n", " {\n", " \"Classification\": \"hadoop-env\",\n", " \"Configurations\": [\n", " {\n", " \"Classification\": \"export\",\n", " \"Properties\": {\n", " \"JAVA_HOME\": \"/usr/lib/jvm/java-1.8.0\"\n", " }\n", " }\n", " ],\n", " \"Properties\": {}\n", " },\n", " {\n", " \"Classification\": \"spark-env\",\n", " \"Configurations\": [\n", " {\n", " \"Classification\": \"export\",\n", " \"Properties\": {\n", " \"JAVA_HOME\": \"/usr/lib/jvm/java-1.8.0\"\n", " }\n", " }\n", " ],\n", " \"Properties\": {}\n", " }\n", " ]\n", "\n")),
CoreInstanceGroup: &emr.ClusterCoreInstanceGroupArgs{
AutoscalingPolicy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", "\"Constraints\": {\n", " \"MinCapacity\": 1,\n", " \"MaxCapacity\": 2\n", "},\n", "\"Rules\": [\n", " {\n", " \"Name\": \"ScaleOutMemoryPercentage\",\n", " \"Description\": \"Scale out if YARNMemoryAvailablePercentage is less than 15\",\n", " \"Action\": {\n", " \"SimpleScalingPolicyConfiguration\": {\n", " \"AdjustmentType\": \"CHANGE_IN_CAPACITY\",\n", " \"ScalingAdjustment\": 1,\n", " \"CoolDown\": 300\n", " }\n", " },\n", " \"Trigger\": {\n", " \"CloudWatchAlarmDefinition\": {\n", " \"ComparisonOperator\": \"LESS_THAN\",\n", " \"EvaluationPeriods\": 1,\n", " \"MetricName\": \"YARNMemoryAvailablePercentage\",\n", " \"Namespace\": \"AWS/ElasticMapReduce\",\n", " \"Period\": 300,\n", " \"Statistic\": \"AVERAGE\",\n", " \"Threshold\": 15.0,\n", " \"Unit\": \"PERCENT\"\n", " }\n", " }\n", " }\n", "]\n", "}\n", "\n")),
BidPrice: pulumi.String("0.30"),
EbsConfigs: emr.ClusterCoreInstanceGroupEbsConfigArray{
&emr.ClusterCoreInstanceGroupEbsConfigArgs{
Size: pulumi.Int(40),
Type: pulumi.String("gp2"),
VolumesPerInstance: pulumi.Int(1),
},
},
InstanceCount: pulumi.Int(1),
InstanceType: pulumi.String("c4.large"),
},
EbsRootVolumeSize: pulumi.Int(100),
Ec2Attributes: &emr.ClusterEc2AttributesArgs{
EmrManagedMasterSecurityGroup: pulumi.String(aws_security_group.Sg.Id),
EmrManagedSlaveSecurityGroup: pulumi.String(aws_security_group.Sg.Id),
InstanceProfile: pulumi.String(aws_iam_instance_profile.Emr_profile.Arn),
SubnetId: pulumi.String(aws_subnet.Main.Id),
},
KeepJobFlowAliveWhenNoSteps: pulumi.Bool(true),
MasterInstanceGroup: &emr.ClusterMasterInstanceGroupArgs{
InstanceType: pulumi.String("m4.large"),
},
ReleaseLabel: pulumi.String("emr-4.6.0"),
ServiceRole: pulumi.String(aws_iam_role.Iam_emr_service_role.Arn),
Tags: pulumi.StringMap{
"env": pulumi.String("env"),
"role": pulumi.String("rolename"),
},
TerminationProtection: pulumi.Bool(false),
})
if err != nil {
return err
}
return nil
})
}import pulumi
import pulumi_aws as aws
cluster = aws.emr.Cluster("cluster",
additional_info="""{
"instanceAwsClientConfiguration": {
"proxyPort": 8099,
"proxyHost": "myproxy.example.com"
}
}
""",
applications=["Spark"],
bootstrap_actions=[{
"args": [
"instance.isMaster=true",
"echo running on master node",
],
"name": "runif",
"path": "s3://elasticmapreduce/bootstrap-actions/run-if",
}],
configurations_json=""" [
{
"Classification": "hadoop-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
},
{
"Classification": "spark-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
}
]
""",
core_instance_group={
"autoscaling_policy": """{
"Constraints": {
"MinCapacity": 1,
"MaxCapacity": 2
},
"Rules": [
{
"Name": "ScaleOutMemoryPercentage",
"Description": "Scale out if YARNMemoryAvailablePercentage is less than 15",
"Action": {
"SimpleScalingPolicyConfiguration": {
"AdjustmentType": "CHANGE_IN_CAPACITY",
"ScalingAdjustment": 1,
"CoolDown": 300
}
},
"Trigger": {
"CloudWatchAlarmDefinition": {
"ComparisonOperator": "LESS_THAN",
"EvaluationPeriods": 1,
"MetricName": "YARNMemoryAvailablePercentage",
"Namespace": "AWS/ElasticMapReduce",
"Period": 300,
"Statistic": "AVERAGE",
"Threshold": 15.0,
"Unit": "PERCENT"
}
}
}
]
}
""",
"bid_price": "0.30",
"ebs_configs": [{
"size": "40",
"type": "gp2",
"volumesPerInstance": 1,
}],
"instance_count": 1,
"instance_type": "c4.large",
},
ebs_root_volume_size=100,
ec2_attributes={
"emrManagedMasterSecurityGroup": aws_security_group["sg"]["id"],
"emrManagedSlaveSecurityGroup": aws_security_group["sg"]["id"],
"instanceProfile": aws_iam_instance_profile["emr_profile"]["arn"],
"subnet_id": aws_subnet["main"]["id"],
},
keep_job_flow_alive_when_no_steps=True,
master_instance_group={
"instance_type": "m4.large",
},
release_label="emr-4.6.0",
service_role=aws_iam_role["iam_emr_service_role"]["arn"],
tags={
"env": "env",
"role": "rolename",
},
termination_protection=False)import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const cluster = new aws.emr.Cluster("cluster", {
additionalInfo: `{
"instanceAwsClientConfiguration": {
"proxyPort": 8099,
"proxyHost": "myproxy.example.com"
}
}
`,
applications: ["Spark"],
bootstrapActions: [{
args: [
"instance.isMaster=true",
"echo running on master node",
],
name: "runif",
path: "s3://elasticmapreduce/bootstrap-actions/run-if",
}],
configurationsJson: ` [
{
"Classification": "hadoop-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
},
{
"Classification": "spark-env",
"Configurations": [
{
"Classification": "export",
"Properties": {
"JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
}
}
],
"Properties": {}
}
]
`,
coreInstanceGroup: {
autoscalingPolicy: `{
"Constraints": {
"MinCapacity": 1,
"MaxCapacity": 2
},
"Rules": [
{
"Name": "ScaleOutMemoryPercentage",
"Description": "Scale out if YARNMemoryAvailablePercentage is less than 15",
"Action": {
"SimpleScalingPolicyConfiguration": {
"AdjustmentType": "CHANGE_IN_CAPACITY",
"ScalingAdjustment": 1,
"CoolDown": 300
}
},
"Trigger": {
"CloudWatchAlarmDefinition": {
"ComparisonOperator": "LESS_THAN",
"EvaluationPeriods": 1,
"MetricName": "YARNMemoryAvailablePercentage",
"Namespace": "AWS/ElasticMapReduce",
"Period": 300,
"Statistic": "AVERAGE",
"Threshold": 15.0,
"Unit": "PERCENT"
}
}
}
]
}
`,
bidPrice: "0.30",
ebsConfigs: [{
size: 40,
type: "gp2",
volumesPerInstance: 1,
}],
instanceCount: 1,
instanceType: "c4.large",
},
ebsRootVolumeSize: 100,
ec2Attributes: {
emrManagedMasterSecurityGroup: aws_security_group_sg.id,
emrManagedSlaveSecurityGroup: aws_security_group_sg.id,
instanceProfile: aws_iam_instance_profile_emr_profile.arn,
subnetId: aws_subnet_main.id,
},
keepJobFlowAliveWhenNoSteps: true,
masterInstanceGroup: {
instanceType: "m4.large",
},
releaseLabel: "emr-4.6.0",
serviceRole: aws_iam_role_iam_emr_service_role.arn,
tags: {
env: "env",
role: "rolename",
},
terminationProtection: false,
});Enable Debug Logging
Coming soon!
Coming soon!
import pulumi
import pulumi_aws as aws
example = aws.emr.Cluster("example",
lifecycle={
"ignoreChanges": [
"stepConcurrencyLevel",
"steps",
],
},
steps=[{
"actionOnFailure": "TERMINATE_CLUSTER",
"hadoopJarStep": {
"args": ["state-pusher-script"],
"jar": "command-runner.jar",
},
"name": "Setup Hadoop Debugging",
}])import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.emr.Cluster("example", {
steps: [{
actionOnFailure: "TERMINATE_CLUSTER",
hadoopJarStep: {
args: ["state-pusher-script"],
jar: "command-runner.jar",
},
name: "Setup Hadoop Debugging",
}],
}, { ignoreChanges: ["stepConcurrencyLevel", "steps"] });Multiple Node Master Instance Group
using Pulumi;
using Aws = Pulumi.Aws;
class MyStack : Stack
{
public MyStack()
{
// Map public IP on launch must be enabled for public (Internet accessible) subnets
var exampleSubnet = new Aws.Ec2.Subnet("exampleSubnet", new Aws.Ec2.SubnetArgs
{
MapPublicIpOnLaunch = true,
});
var exampleCluster = new Aws.Emr.Cluster("exampleCluster", new Aws.Emr.ClusterArgs
{
CoreInstanceGroup = ,
Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
{
SubnetId = exampleSubnet.Id,
},
MasterInstanceGroup = new Aws.Emr.Inputs.ClusterMasterInstanceGroupArgs
{
InstanceCount = 3,
},
ReleaseLabel = "emr-5.24.1",
TerminationProtection = true,
});
}
}
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/ec2"
"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/emr"
"github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
exampleSubnet, err := ec2.NewSubnet(ctx, "exampleSubnet", &ec2.SubnetArgs{
MapPublicIpOnLaunch: pulumi.Bool(true),
})
if err != nil {
return err
}
_, err = emr.NewCluster(ctx, "exampleCluster", &emr.ClusterArgs{
CoreInstanceGroup: nil,
Ec2Attributes: &emr.ClusterEc2AttributesArgs{
SubnetId: exampleSubnet.ID(),
},
MasterInstanceGroup: &emr.ClusterMasterInstanceGroupArgs{
InstanceCount: pulumi.Int(3),
},
ReleaseLabel: pulumi.String("emr-5.24.1"),
TerminationProtection: pulumi.Bool(true),
})
if err != nil {
return err
}
return nil
})
}import pulumi
import pulumi_aws as aws
# Map public IP on launch must be enabled for public (Internet accessible) subnets
example_subnet = aws.ec2.Subnet("exampleSubnet", map_public_ip_on_launch=True)
example_cluster = aws.emr.Cluster("exampleCluster",
core_instance_group={},
ec2_attributes={
"subnet_id": example_subnet.id,
},
master_instance_group={
"instance_count": 3,
},
release_label="emr-5.24.1",
termination_protection=True)import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
// Map public IP on launch must be enabled for public (Internet accessible) subnets
const exampleSubnet = new aws.ec2.Subnet("example", {
mapPublicIpOnLaunch: true,
});
const exampleCluster = new aws.emr.Cluster("example", {
// core_instance_group must be configured
coreInstanceGroup: {},
ec2Attributes: {
subnetId: exampleSubnet.id,
},
masterInstanceGroup: {
// Master instance count must be set to 3
instanceCount: 3,
},
// EMR version must be 5.23.0 or later
releaseLabel: "emr-5.24.1",
// Termination protection is automatically enabled for multiple masters
// To destroy the cluster, this must be configured to false and applied first
terminationProtection: true,
});Create a Cluster Resource
new Cluster(name: string, args: ClusterArgs, opts?: CustomResourceOptions);def Cluster(resource_name, opts=None, additional_info=None, applications=None, autoscaling_role=None, bootstrap_actions=None, configurations=None, configurations_json=None, core_instance_count=None, core_instance_group=None, core_instance_type=None, custom_ami_id=None, ebs_root_volume_size=None, ec2_attributes=None, instance_groups=None, keep_job_flow_alive_when_no_steps=None, kerberos_attributes=None, log_uri=None, master_instance_group=None, master_instance_type=None, name=None, release_label=None, scale_down_behavior=None, security_configuration=None, service_role=None, step_concurrency_level=None, steps=None, tags=None, termination_protection=None, visible_to_all_users=None, __props__=None);func NewCluster(ctx *Context, name string, args ClusterArgs, opts ...ResourceOption) (*Cluster, error)public Cluster(string name, ClusterArgs args, CustomResourceOptions? opts = null)- name string
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- opts ResourceOptions
- A bag of options that control this resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
Cluster Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Programming Model docs.
Inputs
The Cluster resource accepts the following input properties:
- Release
Label string The release label for the Amazon EMR release
- Service
Role string IAM role that will be assumed by the Amazon EMR service to access AWS resources
- Additional
Info string A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.
- Applications List<string>
A list of applications for the cluster. Valid values are:
Flink,Hadoop,Hive,Mahout,Pig,Spark, andJupyterHub(as of EMR 5.14.0). Case insensitive- Autoscaling
Role string An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.
- Bootstrap
Actions List<ClusterBootstrap Action Args> Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.
- Configurations string
List of configurations supplied for the EMR cluster you are creating
- Configurations
Json string A JSON string for supplying list of configurations for the EMR cluster.
- Core
Instance intCount Use the
core_instance_groupconfiguration blockinstance_countargument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set. Default1- Core
Instance ClusterGroup Core Instance Group Args Configuration block to use an Instance Group for the core node type. Cannot be specified if
core_instance_countargument,core_instance_typeargument, orinstance_groupconfiguration blocks are set. Detailed below.- Core
Instance stringType Use the
core_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the slave nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set.- Custom
Ami stringId A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.
- Ebs
Root intVolume Size Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.
- Ec2Attributes
Cluster
Ec2Attributes Args Attributes for the EC2 instances running the job flow. Defined below
- Instance
Groups List<ClusterInstance Group Args> Use the
master_instance_groupconfiguration block,core_instance_groupconfiguration block andaws.emr.InstanceGroupresource(s) instead. A list ofinstance_groupobjects for each instance group in the cluster. Exactly one ofmaster_instance_typeandinstance_groupmust be specified. Ifinstance_groupis set, then it must contain a configuration block for at least theMASTERinstance group type (as well as any additional instance groups). Cannot be specified ifmaster_instance_grouporcore_instance_groupconfiguration blocks are set. Defined below- Keep
Job boolFlow Alive When No Steps Switch on/off run cluster with no steps or when all steps are complete (default is on)
- Kerberos
Attributes ClusterKerberos Attributes Args Kerberos configuration for the cluster. Defined below
- Log
Uri string S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
- Master
Instance ClusterGroup Master Instance Group Args Configuration block to use an Instance Group for the master node type. Cannot be specified if
master_instance_typeargument orinstance_groupconfiguration blocks are set. Detailed below.- Master
Instance stringType Use the
master_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the master node. Cannot be specified ifmaster_instance_grouporinstance_groupconfiguration blocks are set.- Name string
The name of the step.
- Scale
Down stringBehavior The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an
instance groupis resized.- Security
Configuration string The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with
release_label4.8.0 or greater- Step
Concurrency intLevel The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with
release_label5.28.0 or greater. (default is 1)- Steps
List<Cluster
Step Args> List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize
ignoreChangesif other steps are being managed outside of this provider.- Dictionary<string, string>
list of tags to apply to the EMR Cluster
- Termination
Protection bool Switch on/off termination protection (default is
false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set tofalse.- Visible
To boolAll Users Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default
true
- Release
Label string The release label for the Amazon EMR release
- Service
Role string IAM role that will be assumed by the Amazon EMR service to access AWS resources
- Additional
Info string A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.
- Applications []string
A list of applications for the cluster. Valid values are:
Flink,Hadoop,Hive,Mahout,Pig,Spark, andJupyterHub(as of EMR 5.14.0). Case insensitive- Autoscaling
Role string An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.
- Bootstrap
Actions []ClusterBootstrap Action Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.
- Configurations string
List of configurations supplied for the EMR cluster you are creating
- Configurations
Json string A JSON string for supplying list of configurations for the EMR cluster.
- Core
Instance intCount Use the
core_instance_groupconfiguration blockinstance_countargument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set. Default1- Core
Instance ClusterGroup Core Instance Group Configuration block to use an Instance Group for the core node type. Cannot be specified if
core_instance_countargument,core_instance_typeargument, orinstance_groupconfiguration blocks are set. Detailed below.- Core
Instance stringType Use the
core_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the slave nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set.- Custom
Ami stringId A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.
- Ebs
Root intVolume Size Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.
- Ec2Attributes
Cluster
Ec2Attributes Attributes for the EC2 instances running the job flow. Defined below
- Instance
Groups []ClusterInstance Group Use the
master_instance_groupconfiguration block,core_instance_groupconfiguration block andaws.emr.InstanceGroupresource(s) instead. A list ofinstance_groupobjects for each instance group in the cluster. Exactly one ofmaster_instance_typeandinstance_groupmust be specified. Ifinstance_groupis set, then it must contain a configuration block for at least theMASTERinstance group type (as well as any additional instance groups). Cannot be specified ifmaster_instance_grouporcore_instance_groupconfiguration blocks are set. Defined below- Keep
Job boolFlow Alive When No Steps Switch on/off run cluster with no steps or when all steps are complete (default is on)
- Kerberos
Attributes ClusterKerberos Attributes Kerberos configuration for the cluster. Defined below
- Log
Uri string S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
- Master
Instance ClusterGroup Master Instance Group Configuration block to use an Instance Group for the master node type. Cannot be specified if
master_instance_typeargument orinstance_groupconfiguration blocks are set. Detailed below.- Master
Instance stringType Use the
master_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the master node. Cannot be specified ifmaster_instance_grouporinstance_groupconfiguration blocks are set.- Name string
The name of the step.
- Scale
Down stringBehavior The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an
instance groupis resized.- Security
Configuration string The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with
release_label4.8.0 or greater- Step
Concurrency intLevel The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with
release_label5.28.0 or greater. (default is 1)- Steps
[]Cluster
Step List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize
ignoreChangesif other steps are being managed outside of this provider.- map[string]string
list of tags to apply to the EMR Cluster
- Termination
Protection bool Switch on/off termination protection (default is
false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set tofalse.- Visible
To boolAll Users Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default
true
- release
Label string The release label for the Amazon EMR release
- service
Role string IAM role that will be assumed by the Amazon EMR service to access AWS resources
- additional
Info string A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.
- applications string[]
A list of applications for the cluster. Valid values are:
Flink,Hadoop,Hive,Mahout,Pig,Spark, andJupyterHub(as of EMR 5.14.0). Case insensitive- autoscaling
Role string An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.
- bootstrap
Actions ClusterBootstrap Action[] Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.
- configurations string
List of configurations supplied for the EMR cluster you are creating
- configurations
Json string A JSON string for supplying list of configurations for the EMR cluster.
- core
Instance numberCount Use the
core_instance_groupconfiguration blockinstance_countargument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set. Default1- core
Instance ClusterGroup Core Instance Group Configuration block to use an Instance Group for the core node type. Cannot be specified if
core_instance_countargument,core_instance_typeargument, orinstance_groupconfiguration blocks are set. Detailed below.- core
Instance stringType Use the
core_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the slave nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set.- custom
Ami stringId A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.
- ebs
Root numberVolume Size Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.
- ec2Attributes
Cluster
Ec2Attributes Attributes for the EC2 instances running the job flow. Defined below
- instance
Groups ClusterInstance Group[] Use the
master_instance_groupconfiguration block,core_instance_groupconfiguration block andaws.emr.InstanceGroupresource(s) instead. A list ofinstance_groupobjects for each instance group in the cluster. Exactly one ofmaster_instance_typeandinstance_groupmust be specified. Ifinstance_groupis set, then it must contain a configuration block for at least theMASTERinstance group type (as well as any additional instance groups). Cannot be specified ifmaster_instance_grouporcore_instance_groupconfiguration blocks are set. Defined below- keep
Job booleanFlow Alive When No Steps Switch on/off run cluster with no steps or when all steps are complete (default is on)
- kerberos
Attributes ClusterKerberos Attributes Kerberos configuration for the cluster. Defined below
- log
Uri string S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
- master
Instance ClusterGroup Master Instance Group Configuration block to use an Instance Group for the master node type. Cannot be specified if
master_instance_typeargument orinstance_groupconfiguration blocks are set. Detailed below.- master
Instance stringType Use the
master_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the master node. Cannot be specified ifmaster_instance_grouporinstance_groupconfiguration blocks are set.- name string
The name of the step.
- scale
Down stringBehavior The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an
instance groupis resized.- security
Configuration string The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with
release_label4.8.0 or greater- step
Concurrency numberLevel The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with
release_label5.28.0 or greater. (default is 1)- steps
Cluster
Step[] List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize
ignoreChangesif other steps are being managed outside of this provider.- {[key: string]: string}
list of tags to apply to the EMR Cluster
- termination
Protection boolean Switch on/off termination protection (default is
false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set tofalse.- visible
To booleanAll Users Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default
true
- release_
label str The release label for the Amazon EMR release
- service_
role str IAM role that will be assumed by the Amazon EMR service to access AWS resources
- additional_
info str A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.
- applications List[str]
A list of applications for the cluster. Valid values are:
Flink,Hadoop,Hive,Mahout,Pig,Spark, andJupyterHub(as of EMR 5.14.0). Case insensitive- autoscaling_
role str An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.
- bootstrap_
actions List[ClusterBootstrap Action] Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.
- configurations str
List of configurations supplied for the EMR cluster you are creating
- configurations_
json str A JSON string for supplying list of configurations for the EMR cluster.
- core_
instance_ floatcount Use the
core_instance_groupconfiguration blockinstance_countargument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set. Default1- core_
instance_ Dict[Clustergroup Core Instance Group] Configuration block to use an Instance Group for the core node type. Cannot be specified if
core_instance_countargument,core_instance_typeargument, orinstance_groupconfiguration blocks are set. Detailed below.- core_
instance_ strtype Use the
core_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the slave nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set.- custom_
ami_ strid A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.
- ebs_
root_ floatvolume_ size Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.
- ec2_
attributes Dict[ClusterEc2Attributes] Attributes for the EC2 instances running the job flow. Defined below
- instance_
groups List[ClusterInstance Group] Use the
master_instance_groupconfiguration block,core_instance_groupconfiguration block andaws.emr.InstanceGroupresource(s) instead. A list ofinstance_groupobjects for each instance group in the cluster. Exactly one ofmaster_instance_typeandinstance_groupmust be specified. Ifinstance_groupis set, then it must contain a configuration block for at least theMASTERinstance group type (as well as any additional instance groups). Cannot be specified ifmaster_instance_grouporcore_instance_groupconfiguration blocks are set. Defined below- keep_
job_ boolflow_ alive_ when_ no_ steps Switch on/off run cluster with no steps or when all steps are complete (default is on)
- kerberos_
attributes Dict[ClusterKerberos Attributes] Kerberos configuration for the cluster. Defined below
- log_
uri str S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
- master_
instance_ Dict[Clustergroup Master Instance Group] Configuration block to use an Instance Group for the master node type. Cannot be specified if
master_instance_typeargument orinstance_groupconfiguration blocks are set. Detailed below.- master_
instance_ strtype Use the
master_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the master node. Cannot be specified ifmaster_instance_grouporinstance_groupconfiguration blocks are set.- name str
The name of the step.
- scale_
down_ strbehavior The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an
instance groupis resized.- security_
configuration str The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with
release_label4.8.0 or greater- step_
concurrency_ floatlevel The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with
release_label5.28.0 or greater. (default is 1)- steps
List[Cluster
Step] List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize
ignoreChangesif other steps are being managed outside of this provider.- Dict[str, str]
list of tags to apply to the EMR Cluster
- termination_
protection bool Switch on/off termination protection (default is
false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set tofalse.- visible_
to_ boolall_ users Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default
true
Outputs
All input properties are implicitly available as output properties. Additionally, the Cluster resource produces the following output properties:
- Arn string
- Cluster
State string - Id string
- The provider-assigned unique ID for this managed resource.
- Master
Public stringDns The public DNS name of the master EC2 instance. *
core_instance_group.0.id- Core node type Instance Group ID, if using Instance Group for this node type.
- arn string
- cluster
State string - id string
- The provider-assigned unique ID for this managed resource.
- master
Public stringDns The public DNS name of the master EC2 instance. *
core_instance_group.0.id- Core node type Instance Group ID, if using Instance Group for this node type.
- arn str
- cluster_
state str - id str
- The provider-assigned unique ID for this managed resource.
- master_
public_ strdns The public DNS name of the master EC2 instance. *
core_instance_group.0.id- Core node type Instance Group ID, if using Instance Group for this node type.
Look up an Existing Cluster Resource
Get an existing Cluster resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: ClusterState, opts?: CustomResourceOptions): Clusterstatic get(resource_name, id, opts=None, additional_info=None, applications=None, arn=None, autoscaling_role=None, bootstrap_actions=None, cluster_state=None, configurations=None, configurations_json=None, core_instance_count=None, core_instance_group=None, core_instance_type=None, custom_ami_id=None, ebs_root_volume_size=None, ec2_attributes=None, instance_groups=None, keep_job_flow_alive_when_no_steps=None, kerberos_attributes=None, log_uri=None, master_instance_group=None, master_instance_type=None, master_public_dns=None, name=None, release_label=None, scale_down_behavior=None, security_configuration=None, service_role=None, step_concurrency_level=None, steps=None, tags=None, termination_protection=None, visible_to_all_users=None, __props__=None);func GetCluster(ctx *Context, name string, id IDInput, state *ClusterState, opts ...ResourceOption) (*Cluster, error)public static Cluster Get(string name, Input<string> id, ClusterState? state, CustomResourceOptions? opts = null)- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
The following state arguments are supported:
- Additional
Info string A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.
- Applications List<string>
A list of applications for the cluster. Valid values are:
Flink,Hadoop,Hive,Mahout,Pig,Spark, andJupyterHub(as of EMR 5.14.0). Case insensitive- Arn string
- Autoscaling
Role string An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.
- Bootstrap
Actions List<ClusterBootstrap Action Args> Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.
- Configurations string
List of configurations supplied for the EMR cluster you are creating
- Configurations
Json string A JSON string for supplying list of configurations for the EMR cluster.
- Core
Instance intCount Use the
core_instance_groupconfiguration blockinstance_countargument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set. Default1- Core
Instance ClusterGroup Core Instance Group Args Configuration block to use an Instance Group for the core node type. Cannot be specified if
core_instance_countargument,core_instance_typeargument, orinstance_groupconfiguration blocks are set. Detailed below.- Core
Instance stringType Use the
core_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the slave nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set.- Custom
Ami stringId A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.
- Ebs
Root intVolume Size Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.
- Ec2Attributes
Cluster
Ec2Attributes Args Attributes for the EC2 instances running the job flow. Defined below
- Instance
Groups List<ClusterInstance Group Args> Use the
master_instance_groupconfiguration block,core_instance_groupconfiguration block andaws.emr.InstanceGroupresource(s) instead. A list ofinstance_groupobjects for each instance group in the cluster. Exactly one ofmaster_instance_typeandinstance_groupmust be specified. Ifinstance_groupis set, then it must contain a configuration block for at least theMASTERinstance group type (as well as any additional instance groups). Cannot be specified ifmaster_instance_grouporcore_instance_groupconfiguration blocks are set. Defined below- Keep
Job boolFlow Alive When No Steps Switch on/off run cluster with no steps or when all steps are complete (default is on)
- Kerberos
Attributes ClusterKerberos Attributes Args Kerberos configuration for the cluster. Defined below
- Log
Uri string S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
- Master
Instance ClusterGroup Master Instance Group Args Configuration block to use an Instance Group for the master node type. Cannot be specified if
master_instance_typeargument orinstance_groupconfiguration blocks are set. Detailed below.- Master
Instance stringType Use the
master_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the master node. Cannot be specified ifmaster_instance_grouporinstance_groupconfiguration blocks are set.- Master
Public stringDns The public DNS name of the master EC2 instance. *
core_instance_group.0.id- Core node type Instance Group ID, if using Instance Group for this node type.- Name string
The name of the step.
- Release
Label string The release label for the Amazon EMR release
- Scale
Down stringBehavior The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an
instance groupis resized.- Security
Configuration string The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with
release_label4.8.0 or greater- Service
Role string IAM role that will be assumed by the Amazon EMR service to access AWS resources
- State string
- Step
Concurrency intLevel The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with
release_label5.28.0 or greater. (default is 1)- Steps
List<Cluster
Step Args> List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize
ignoreChangesif other steps are being managed outside of this provider.- Dictionary<string, string>
list of tags to apply to the EMR Cluster
- Termination
Protection bool Switch on/off termination protection (default is
false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set tofalse.- Visible
To boolAll Users Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default
true
- Additional
Info string A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.
- Applications []string
A list of applications for the cluster. Valid values are:
Flink,Hadoop,Hive,Mahout,Pig,Spark, andJupyterHub(as of EMR 5.14.0). Case insensitive- Arn string
- Autoscaling
Role string An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.
- Bootstrap
Actions []ClusterBootstrap Action Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.
- Cluster
State string - Configurations string
List of configurations supplied for the EMR cluster you are creating
- Configurations
Json string A JSON string for supplying list of configurations for the EMR cluster.
- Core
Instance intCount Use the
core_instance_groupconfiguration blockinstance_countargument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set. Default1- Core
Instance ClusterGroup Core Instance Group Configuration block to use an Instance Group for the core node type. Cannot be specified if
core_instance_countargument,core_instance_typeargument, orinstance_groupconfiguration blocks are set. Detailed below.- Core
Instance stringType Use the
core_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the slave nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set.- Custom
Ami stringId A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.
- Ebs
Root intVolume Size Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.
- Ec2Attributes
Cluster
Ec2Attributes Attributes for the EC2 instances running the job flow. Defined below
- Instance
Groups []ClusterInstance Group Use the
master_instance_groupconfiguration block,core_instance_groupconfiguration block andaws.emr.InstanceGroupresource(s) instead. A list ofinstance_groupobjects for each instance group in the cluster. Exactly one ofmaster_instance_typeandinstance_groupmust be specified. Ifinstance_groupis set, then it must contain a configuration block for at least theMASTERinstance group type (as well as any additional instance groups). Cannot be specified ifmaster_instance_grouporcore_instance_groupconfiguration blocks are set. Defined below- Keep
Job boolFlow Alive When No Steps Switch on/off run cluster with no steps or when all steps are complete (default is on)
- Kerberos
Attributes ClusterKerberos Attributes Kerberos configuration for the cluster. Defined below
- Log
Uri string S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
- Master
Instance ClusterGroup Master Instance Group Configuration block to use an Instance Group for the master node type. Cannot be specified if
master_instance_typeargument orinstance_groupconfiguration blocks are set. Detailed below.- Master
Instance stringType Use the
master_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the master node. Cannot be specified ifmaster_instance_grouporinstance_groupconfiguration blocks are set.- Master
Public stringDns The public DNS name of the master EC2 instance. *
core_instance_group.0.id- Core node type Instance Group ID, if using Instance Group for this node type.- Name string
The name of the step.
- Release
Label string The release label for the Amazon EMR release
- Scale
Down stringBehavior The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an
instance groupis resized.- Security
Configuration string The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with
release_label4.8.0 or greater- Service
Role string IAM role that will be assumed by the Amazon EMR service to access AWS resources
- Step
Concurrency intLevel The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with
release_label5.28.0 or greater. (default is 1)- Steps
[]Cluster
Step List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize
ignoreChangesif other steps are being managed outside of this provider.- map[string]string
list of tags to apply to the EMR Cluster
- Termination
Protection bool Switch on/off termination protection (default is
false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set tofalse.- Visible
To boolAll Users Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default
true
- additional
Info string A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.
- applications string[]
A list of applications for the cluster. Valid values are:
Flink,Hadoop,Hive,Mahout,Pig,Spark, andJupyterHub(as of EMR 5.14.0). Case insensitive- arn string
- autoscaling
Role string An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.
- bootstrap
Actions ClusterBootstrap Action[] Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.
- cluster
State string - configurations string
List of configurations supplied for the EMR cluster you are creating
- configurations
Json string A JSON string for supplying list of configurations for the EMR cluster.
- core
Instance numberCount Use the
core_instance_groupconfiguration blockinstance_countargument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set. Default1- core
Instance ClusterGroup Core Instance Group Configuration block to use an Instance Group for the core node type. Cannot be specified if
core_instance_countargument,core_instance_typeargument, orinstance_groupconfiguration blocks are set. Detailed below.- core
Instance stringType Use the
core_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the slave nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set.- custom
Ami stringId A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.
- ebs
Root numberVolume Size Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.
- ec2Attributes
Cluster
Ec2Attributes Attributes for the EC2 instances running the job flow. Defined below
- instance
Groups ClusterInstance Group[] Use the
master_instance_groupconfiguration block,core_instance_groupconfiguration block andaws.emr.InstanceGroupresource(s) instead. A list ofinstance_groupobjects for each instance group in the cluster. Exactly one ofmaster_instance_typeandinstance_groupmust be specified. Ifinstance_groupis set, then it must contain a configuration block for at least theMASTERinstance group type (as well as any additional instance groups). Cannot be specified ifmaster_instance_grouporcore_instance_groupconfiguration blocks are set. Defined below- keep
Job booleanFlow Alive When No Steps Switch on/off run cluster with no steps or when all steps are complete (default is on)
- kerberos
Attributes ClusterKerberos Attributes Kerberos configuration for the cluster. Defined below
- log
Uri string S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
- master
Instance ClusterGroup Master Instance Group Configuration block to use an Instance Group for the master node type. Cannot be specified if
master_instance_typeargument orinstance_groupconfiguration blocks are set. Detailed below.- master
Instance stringType Use the
master_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the master node. Cannot be specified ifmaster_instance_grouporinstance_groupconfiguration blocks are set.- master
Public stringDns The public DNS name of the master EC2 instance. *
core_instance_group.0.id- Core node type Instance Group ID, if using Instance Group for this node type.- name string
The name of the step.
- release
Label string The release label for the Amazon EMR release
- scale
Down stringBehavior The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an
instance groupis resized.- security
Configuration string The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with
release_label4.8.0 or greater- service
Role string IAM role that will be assumed by the Amazon EMR service to access AWS resources
- step
Concurrency numberLevel The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with
release_label5.28.0 or greater. (default is 1)- steps
Cluster
Step[] List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize
ignoreChangesif other steps are being managed outside of this provider.- {[key: string]: string}
list of tags to apply to the EMR Cluster
- termination
Protection boolean Switch on/off termination protection (default is
false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set tofalse.- visible
To booleanAll Users Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default
true
- additional_
info str A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.
- applications List[str]
A list of applications for the cluster. Valid values are:
Flink,Hadoop,Hive,Mahout,Pig,Spark, andJupyterHub(as of EMR 5.14.0). Case insensitive- arn str
- autoscaling_
role str An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.
- bootstrap_
actions List[ClusterBootstrap Action] Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.
- cluster_
state str - configurations str
List of configurations supplied for the EMR cluster you are creating
- configurations_
json str A JSON string for supplying list of configurations for the EMR cluster.
- core_
instance_ floatcount Use the
core_instance_groupconfiguration blockinstance_countargument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set. Default1- core_
instance_ Dict[Clustergroup Core Instance Group] Configuration block to use an Instance Group for the core node type. Cannot be specified if
core_instance_countargument,core_instance_typeargument, orinstance_groupconfiguration blocks are set. Detailed below.- core_
instance_ strtype Use the
core_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the slave nodes. Cannot be specified ifcore_instance_grouporinstance_groupconfiguration blocks are set.- custom_
ami_ strid A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.
- ebs_
root_ floatvolume_ size Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.
- ec2_
attributes Dict[ClusterEc2Attributes] Attributes for the EC2 instances running the job flow. Defined below
- instance_
groups List[ClusterInstance Group] Use the
master_instance_groupconfiguration block,core_instance_groupconfiguration block andaws.emr.InstanceGroupresource(s) instead. A list ofinstance_groupobjects for each instance group in the cluster. Exactly one ofmaster_instance_typeandinstance_groupmust be specified. Ifinstance_groupis set, then it must contain a configuration block for at least theMASTERinstance group type (as well as any additional instance groups). Cannot be specified ifmaster_instance_grouporcore_instance_groupconfiguration blocks are set. Defined below- keep_
job_ boolflow_ alive_ when_ no_ steps Switch on/off run cluster with no steps or when all steps are complete (default is on)
- kerberos_
attributes Dict[ClusterKerberos Attributes] Kerberos configuration for the cluster. Defined below
- log_
uri str S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
- master_
instance_ Dict[Clustergroup Master Instance Group] Configuration block to use an Instance Group for the master node type. Cannot be specified if
master_instance_typeargument orinstance_groupconfiguration blocks are set. Detailed below.- master_
instance_ strtype Use the
master_instance_groupconfiguration blockinstance_typeargument instead. The EC2 instance type of the master node. Cannot be specified ifmaster_instance_grouporinstance_groupconfiguration blocks are set.- master_
public_ strdns The public DNS name of the master EC2 instance. *
core_instance_group.0.id- Core node type Instance Group ID, if using Instance Group for this node type.- name str
The name of the step.
- release_
label str The release label for the Amazon EMR release
- scale_
down_ strbehavior The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an
instance groupis resized.- security_
configuration str The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with
release_label4.8.0 or greater- service_
role str IAM role that will be assumed by the Amazon EMR service to access AWS resources
- step_
concurrency_ floatlevel The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with
release_label5.28.0 or greater. (default is 1)- steps
List[Cluster
Step] List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize
ignoreChangesif other steps are being managed outside of this provider.- Dict[str, str]
list of tags to apply to the EMR Cluster
- termination_
protection bool Switch on/off termination protection (default is
false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set tofalse.- visible_
to_ boolall_ users Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default
true
Supporting Types
ClusterBootstrapAction
ClusterCoreInstanceGroup
- Instance
Type string EC2 instance type for all instances in the instance group.
- Autoscaling
Policy string The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling
- Bid
Price string Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- Ebs
Configs List<ClusterCore Instance Group Ebs Config Args> Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- Id string
The ID of the EMR Cluster
- Instance
Count int Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- Name string
The name of the step.
- Instance
Type string EC2 instance type for all instances in the instance group.
- Autoscaling
Policy string The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling
- Bid
Price string Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- Ebs
Configs []ClusterCore Instance Group Ebs Config Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- Id string
The ID of the EMR Cluster
- Instance
Count int Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- Name string
The name of the step.
- instance
Type string EC2 instance type for all instances in the instance group.
- autoscaling
Policy string The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling
- bid
Price string Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- ebs
Configs ClusterCore Instance Group Ebs Config[] Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- id string
The ID of the EMR Cluster
- instance
Count number Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- name string
The name of the step.
- instance_
type str EC2 instance type for all instances in the instance group.
- autoscaling_
policy str The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling
- bid_
price str Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- ebs_
configs List[ClusterCore Instance Group Ebs Config] Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- id str
The ID of the EMR Cluster
- instance_
count float Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- name str
The name of the step.
ClusterCoreInstanceGroupEbsConfig
- Size int
The volume size, in gibibytes (GiB).
- Type string
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- Iops int
The number of I/O operations per second (IOPS) that the volume supports
- Volumes
Per intInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
- Size int
The volume size, in gibibytes (GiB).
- Type string
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- Iops int
The number of I/O operations per second (IOPS) that the volume supports
- Volumes
Per intInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
- size number
The volume size, in gibibytes (GiB).
- type string
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- iops number
The number of I/O operations per second (IOPS) that the volume supports
- volumes
Per numberInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
- size float
The volume size, in gibibytes (GiB).
- type str
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- iops float
The number of I/O operations per second (IOPS) that the volume supports
- volumes
Per floatInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
ClusterEc2Attributes
- Instance
Profile string Instance Profile for EC2 instances of the cluster assume this role
- Additional
Master stringSecurity Groups String containing a comma separated list of additional Amazon EC2 security group IDs for the master node
- Additional
Slave stringSecurity Groups String containing a comma separated list of additional Amazon EC2 security group IDs for the slave nodes as a comma separated string
- Emr
Managed stringMaster Security Group Identifier of the Amazon EC2 EMR-Managed security group for the master node
- Emr
Managed stringSlave Security Group Identifier of the Amazon EC2 EMR-Managed security group for the slave nodes
- Key
Name string Amazon EC2 key pair that can be used to ssh to the master node as the user called
hadoop- Service
Access stringSecurity Group Identifier of the Amazon EC2 service-access security group - required when the cluster runs on a private subnet
- Subnet
Id string VPC subnet id where you want the job flow to launch. Cannot specify the
cc1.4xlargeinstance type for nodes of a job flow launched in a Amazon VPC
- Instance
Profile string Instance Profile for EC2 instances of the cluster assume this role
- Additional
Master stringSecurity Groups String containing a comma separated list of additional Amazon EC2 security group IDs for the master node
- Additional
Slave stringSecurity Groups String containing a comma separated list of additional Amazon EC2 security group IDs for the slave nodes as a comma separated string
- Emr
Managed stringMaster Security Group Identifier of the Amazon EC2 EMR-Managed security group for the master node
- Emr
Managed stringSlave Security Group Identifier of the Amazon EC2 EMR-Managed security group for the slave nodes
- Key
Name string Amazon EC2 key pair that can be used to ssh to the master node as the user called
hadoop- Service
Access stringSecurity Group Identifier of the Amazon EC2 service-access security group - required when the cluster runs on a private subnet
- Subnet
Id string VPC subnet id where you want the job flow to launch. Cannot specify the
cc1.4xlargeinstance type for nodes of a job flow launched in a Amazon VPC
- instance
Profile string Instance Profile for EC2 instances of the cluster assume this role
- additional
Master stringSecurity Groups String containing a comma separated list of additional Amazon EC2 security group IDs for the master node
- additional
Slave stringSecurity Groups String containing a comma separated list of additional Amazon EC2 security group IDs for the slave nodes as a comma separated string
- emr
Managed stringMaster Security Group Identifier of the Amazon EC2 EMR-Managed security group for the master node
- emr
Managed stringSlave Security Group Identifier of the Amazon EC2 EMR-Managed security group for the slave nodes
- key
Name string Amazon EC2 key pair that can be used to ssh to the master node as the user called
hadoop- service
Access stringSecurity Group Identifier of the Amazon EC2 service-access security group - required when the cluster runs on a private subnet
- subnet
Id string VPC subnet id where you want the job flow to launch. Cannot specify the
cc1.4xlargeinstance type for nodes of a job flow launched in a Amazon VPC
- instance
Profile str Instance Profile for EC2 instances of the cluster assume this role
- additional
Master strSecurity Groups String containing a comma separated list of additional Amazon EC2 security group IDs for the master node
- additional
Slave strSecurity Groups String containing a comma separated list of additional Amazon EC2 security group IDs for the slave nodes as a comma separated string
- emr
Managed strMaster Security Group Identifier of the Amazon EC2 EMR-Managed security group for the master node
- emr
Managed strSlave Security Group Identifier of the Amazon EC2 EMR-Managed security group for the slave nodes
- key_
name str Amazon EC2 key pair that can be used to ssh to the master node as the user called
hadoop- service
Access strSecurity Group Identifier of the Amazon EC2 service-access security group - required when the cluster runs on a private subnet
- subnet_
id str VPC subnet id where you want the job flow to launch. Cannot specify the
cc1.4xlargeinstance type for nodes of a job flow launched in a Amazon VPC
ClusterInstanceGroup
- Instance
Role string The role of the instance group in the cluster. Valid values are:
MASTER,CORE, andTASK.- Instance
Type string EC2 instance type for all instances in the instance group.
- Autoscaling
Policy string The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling
- Bid
Price string Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- Ebs
Configs List<ClusterInstance Group Ebs Config Args> Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- Id string
The ID of the EMR Cluster
- Instance
Count int Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- Name string
The name of the step.
- Instance
Role string The role of the instance group in the cluster. Valid values are:
MASTER,CORE, andTASK.- Instance
Type string EC2 instance type for all instances in the instance group.
- Autoscaling
Policy string The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling
- Bid
Price string Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- Ebs
Configs []ClusterInstance Group Ebs Config Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- Id string
The ID of the EMR Cluster
- Instance
Count int Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- Name string
The name of the step.
- instance
Role string The role of the instance group in the cluster. Valid values are:
MASTER,CORE, andTASK.- instance
Type string EC2 instance type for all instances in the instance group.
- autoscaling
Policy string The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling
- bid
Price string Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- ebs
Configs ClusterInstance Group Ebs Config[] Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- id string
The ID of the EMR Cluster
- instance
Count number Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- name string
The name of the step.
- instance
Role str The role of the instance group in the cluster. Valid values are:
MASTER,CORE, andTASK.- instance_
type str EC2 instance type for all instances in the instance group.
- autoscaling_
policy str The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling
- bid_
price str Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- ebs_
configs List[ClusterInstance Group Ebs Config] Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- id str
The ID of the EMR Cluster
- instance_
count float Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- name str
The name of the step.
ClusterInstanceGroupEbsConfig
- Size int
The volume size, in gibibytes (GiB).
- Type string
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- Iops int
The number of I/O operations per second (IOPS) that the volume supports
- Volumes
Per intInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
- Size int
The volume size, in gibibytes (GiB).
- Type string
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- Iops int
The number of I/O operations per second (IOPS) that the volume supports
- Volumes
Per intInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
- size number
The volume size, in gibibytes (GiB).
- type string
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- iops number
The number of I/O operations per second (IOPS) that the volume supports
- volumes
Per numberInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
- size float
The volume size, in gibibytes (GiB).
- type str
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- iops float
The number of I/O operations per second (IOPS) that the volume supports
- volumes
Per floatInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
ClusterKerberosAttributes
- Kdc
Admin stringPassword The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. This provider cannot perform drift detection of this configuration.
- Realm string
The name of the Kerberos realm to which all nodes in a cluster belong. For example,
EC2.INTERNAL- Ad
Domain stringJoin Password The Active Directory password for
ad_domain_join_user. This provider cannot perform drift detection of this configuration.- Ad
Domain stringJoin User Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. This provider cannot perform drift detection of this configuration.
- Cross
Realm stringTrust Principal Password Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. This provider cannot perform drift detection of this configuration.
- Kdc
Admin stringPassword The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. This provider cannot perform drift detection of this configuration.
- Realm string
The name of the Kerberos realm to which all nodes in a cluster belong. For example,
EC2.INTERNAL- Ad
Domain stringJoin Password The Active Directory password for
ad_domain_join_user. This provider cannot perform drift detection of this configuration.- Ad
Domain stringJoin User Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. This provider cannot perform drift detection of this configuration.
- Cross
Realm stringTrust Principal Password Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. This provider cannot perform drift detection of this configuration.
- kdc
Admin stringPassword The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. This provider cannot perform drift detection of this configuration.
- realm string
The name of the Kerberos realm to which all nodes in a cluster belong. For example,
EC2.INTERNAL- ad
Domain stringJoin Password The Active Directory password for
ad_domain_join_user. This provider cannot perform drift detection of this configuration.- ad
Domain stringJoin User Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. This provider cannot perform drift detection of this configuration.
- cross
Realm stringTrust Principal Password Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. This provider cannot perform drift detection of this configuration.
- kdc
Admin strPassword The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. This provider cannot perform drift detection of this configuration.
- realm str
The name of the Kerberos realm to which all nodes in a cluster belong. For example,
EC2.INTERNAL- ad
Domain strJoin Password The Active Directory password for
ad_domain_join_user. This provider cannot perform drift detection of this configuration.- ad
Domain strJoin User Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. This provider cannot perform drift detection of this configuration.
- cross
Realm strTrust Principal Password Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. This provider cannot perform drift detection of this configuration.
ClusterMasterInstanceGroup
- Instance
Type string EC2 instance type for all instances in the instance group.
- Bid
Price string Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- Ebs
Configs List<ClusterMaster Instance Group Ebs Config Args> Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- Id string
The ID of the EMR Cluster
- Instance
Count int Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- Name string
The name of the step.
- Instance
Type string EC2 instance type for all instances in the instance group.
- Bid
Price string Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- Ebs
Configs []ClusterMaster Instance Group Ebs Config Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- Id string
The ID of the EMR Cluster
- Instance
Count int Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- Name string
The name of the step.
- instance
Type string EC2 instance type for all instances in the instance group.
- bid
Price string Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- ebs
Configs ClusterMaster Instance Group Ebs Config[] Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- id string
The ID of the EMR Cluster
- instance
Count number Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- name string
The name of the step.
- instance_
type str EC2 instance type for all instances in the instance group.
- bid_
price str Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
- ebs_
configs List[ClusterMaster Instance Group Ebs Config] Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
- id str
The ID of the EMR Cluster
- instance_
count float Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s
core_instance_groupto be configured. Public (Internet accessible) instances must be created in VPC subnets that havemap public IP on launchenabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have thetermination_protection = falseconfiguration applied before destroying this resource.- name str
The name of the step.
ClusterMasterInstanceGroupEbsConfig
- Size int
The volume size, in gibibytes (GiB).
- Type string
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- Iops int
The number of I/O operations per second (IOPS) that the volume supports
- Volumes
Per intInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
- Size int
The volume size, in gibibytes (GiB).
- Type string
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- Iops int
The number of I/O operations per second (IOPS) that the volume supports
- Volumes
Per intInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
- size number
The volume size, in gibibytes (GiB).
- type string
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- iops number
The number of I/O operations per second (IOPS) that the volume supports
- volumes
Per numberInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
- size float
The volume size, in gibibytes (GiB).
- type str
The volume type. Valid options are
gp2,io1,standardandst1. See EBS Volume Types.- iops float
The number of I/O operations per second (IOPS) that the volume supports
- volumes
Per floatInstance The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)
ClusterStep
- Action
On stringFailure The action to take if the step fails. Valid values:
TERMINATE_JOB_FLOW,TERMINATE_CLUSTER,CANCEL_AND_WAIT, andCONTINUE- Hadoop
Jar ClusterStep Step Hadoop Jar Step Args The JAR file used for the step. Defined below.
- Name string
The name of the step.
- Action
On stringFailure The action to take if the step fails. Valid values:
TERMINATE_JOB_FLOW,TERMINATE_CLUSTER,CANCEL_AND_WAIT, andCONTINUE- Hadoop
Jar ClusterStep Step Hadoop Jar Step The JAR file used for the step. Defined below.
- Name string
The name of the step.
- action
On stringFailure The action to take if the step fails. Valid values:
TERMINATE_JOB_FLOW,TERMINATE_CLUSTER,CANCEL_AND_WAIT, andCONTINUE- hadoop
Jar ClusterStep Step Hadoop Jar Step The JAR file used for the step. Defined below.
- name string
The name of the step.
- action
On strFailure The action to take if the step fails. Valid values:
TERMINATE_JOB_FLOW,TERMINATE_CLUSTER,CANCEL_AND_WAIT, andCONTINUE- hadoop
Jar Dict[ClusterStep Step Hadoop Jar Step] The JAR file used for the step. Defined below.
- name str
The name of the step.
ClusterStepHadoopJarStep
- Jar string
Path to a JAR file run during the step.
- Args List<string>
List of command line arguments passed to the JAR file’s main function when executed.
- Main
Class string Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.
- Properties Dictionary<string, string>
Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.
- Jar string
Path to a JAR file run during the step.
- Args []string
List of command line arguments passed to the JAR file’s main function when executed.
- Main
Class string Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.
- Properties map[string]string
Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.
- jar string
Path to a JAR file run during the step.
- args string[]
List of command line arguments passed to the JAR file’s main function when executed.
- main
Class string Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.
- properties {[key: string]: string}
Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.
- jar str
Path to a JAR file run during the step.
- args List[str]
List of command line arguments passed to the JAR file’s main function when executed.
- main
Class str Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.
- properties Dict[str, str]
Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.
Package Details
- Repository
- https://github.com/pulumi/pulumi-aws
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
awsTerraform Provider.