Cluster

Provides an Elastic MapReduce Cluster, a web service that makes it easy to process large amounts of data efficiently. See Amazon Elastic MapReduce Documentation for more information.

To configure Instance Groups for task nodes, see the aws.emr.InstanceGroup resource.

Support for Instance Fleets will be made available in an upcoming release.

Example bootable config

NOTE: This configuration demonstrates a minimal configuration needed to boot an example EMR Cluster. It is not meant to display best practices. Please use at your own risk.

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const mainVpc = new aws.ec2.Vpc("mainVpc", {
    cidrBlock: "168.31.0.0/16",
    enableDnsHostnames: true,
    tags: {
        name: "emr_test",
    },
});
const mainSubnet = new aws.ec2.Subnet("mainSubnet", {
    vpcId: mainVpc.id,
    cidrBlock: "168.31.0.0/20",
    tags: {
        name: "emr_test",
    },
});
// IAM role for EMR Service
const iamEmrServiceRole = new aws.iam.Role("iamEmrServiceRole", {assumeRolePolicy: `{
  "Version": "2008-10-17",
  "Statement": [
    {
      "Sid": "",
      "Effect": "Allow",
      "Principal": {
        "Service": "elasticmapreduce.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}
`});
// IAM Role for EC2 Instance Profile
const iamEmrProfileRole = new aws.iam.Role("iamEmrProfileRole", {assumeRolePolicy: `{
  "Version": "2008-10-17",
  "Statement": [
    {
      "Sid": "",
      "Effect": "Allow",
      "Principal": {
        "Service": "ec2.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}
`});
const emrProfile = new aws.iam.InstanceProfile("emrProfile", {roles: [iamEmrProfileRole.name]});
const cluster = new aws.emr.Cluster("cluster", {
    releaseLabel: "emr-4.6.0",
    applications: ["Spark"],
    ec2Attributes: {
        subnetId: mainSubnet.id,
        emrManagedMasterSecurityGroup: aws_security_group.allow_all.id,
        emrManagedSlaveSecurityGroup: aws_security_group.allow_all.id,
        instanceProfile: emrProfile.arn,
    },
    masterInstanceType: "m5.xlarge",
    coreInstanceType: "m5.xlarge",
    coreInstanceCount: 1,
    tags: {
        role: "rolename",
        dns_zone: "env_zone",
        env: "env",
        name: "name-env",
    },
    bootstrapActions: [{
        path: "s3://elasticmapreduce/bootstrap-actions/run-if",
        name: "runif",
        args: [
            "instance.isMaster=true",
            "echo running on master node",
        ],
    }],
    configurationsJson: `  [
    {
      "Classification": "hadoop-env",
      "Configurations": [
        {
          "Classification": "export",
          "Properties": {
            "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
          }
        }
      ],
      "Properties": {}
    },
    {
      "Classification": "spark-env",
      "Configurations": [
        {
          "Classification": "export",
          "Properties": {
            "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
          }
        }
      ],
      "Properties": {}
    }
  ]
`,
    serviceRole: iamEmrServiceRole.arn,
});
const allowAccess = new aws.ec2.SecurityGroup("allowAccess", {
    description: "Allow inbound traffic",
    vpcId: mainVpc.id,
    ingress: [{
        fromPort: 0,
        toPort: 0,
        protocol: "-1",
        cidrBlocks: mainVpc.cidrBlock,
    }],
    egress: [{
        fromPort: 0,
        toPort: 0,
        protocol: "-1",
        cidrBlocks: ["0.0.0.0/0"],
    }],
    tags: {
        name: "emr_test",
    },
}, {
    dependsOn: ["aws_subnet.main"],
});
const gw = new aws.ec2.InternetGateway("gw", {vpcId: mainVpc.id});
const routeTable = new aws.ec2.RouteTable("routeTable", {
    vpcId: mainVpc.id,
    routes: [{
        cidrBlock: "0.0.0.0/0",
        gatewayId: gw.id,
    }],
});
const mainRouteTableAssociation = new aws.ec2.MainRouteTableAssociation("mainRouteTableAssociation", {
    vpcId: mainVpc.id,
    routeTableId: routeTable.id,
});
//##
const iamEmrServicePolicy = new aws.iam.RolePolicy("iamEmrServicePolicy", {
    role: iamEmrServiceRole.id,
    policy: `{
    "Version": "2012-10-17",
    "Statement": [{
        "Effect": "Allow",
        "Resource": "*",
        "Action": [
            "ec2:AuthorizeSecurityGroupEgress",
            "ec2:AuthorizeSecurityGroupIngress",
            "ec2:CancelSpotInstanceRequests",
            "ec2:CreateNetworkInterface",
            "ec2:CreateSecurityGroup",
            "ec2:CreateTags",
            "ec2:DeleteNetworkInterface",
            "ec2:DeleteSecurityGroup",
            "ec2:DeleteTags",
            "ec2:DescribeAvailabilityZones",
            "ec2:DescribeAccountAttributes",
            "ec2:DescribeDhcpOptions",
            "ec2:DescribeInstanceStatus",
            "ec2:DescribeInstances",
            "ec2:DescribeKeyPairs",
            "ec2:DescribeNetworkAcls",
            "ec2:DescribeNetworkInterfaces",
            "ec2:DescribePrefixLists",
            "ec2:DescribeRouteTables",
            "ec2:DescribeSecurityGroups",
            "ec2:DescribeSpotInstanceRequests",
            "ec2:DescribeSpotPriceHistory",
            "ec2:DescribeSubnets",
            "ec2:DescribeVpcAttribute",
            "ec2:DescribeVpcEndpoints",
            "ec2:DescribeVpcEndpointServices",
            "ec2:DescribeVpcs",
            "ec2:DetachNetworkInterface",
            "ec2:ModifyImageAttribute",
            "ec2:ModifyInstanceAttribute",
            "ec2:RequestSpotInstances",
            "ec2:RevokeSecurityGroupEgress",
            "ec2:RunInstances",
            "ec2:TerminateInstances",
            "ec2:DeleteVolume",
            "ec2:DescribeVolumeStatus",
            "ec2:DescribeVolumes",
            "ec2:DetachVolume",
            "iam:GetRole",
            "iam:GetRolePolicy",
            "iam:ListInstanceProfiles",
            "iam:ListRolePolicies",
            "iam:PassRole",
            "s3:CreateBucket",
            "s3:Get*",
            "s3:List*",
            "sdb:BatchPutAttributes",
            "sdb:Select",
            "sqs:CreateQueue",
            "sqs:Delete*",
            "sqs:GetQueue*",
            "sqs:PurgeQueue",
            "sqs:ReceiveMessage"
        ]
    }]
}
`,
});
const iamEmrProfilePolicy = new aws.iam.RolePolicy("iamEmrProfilePolicy", {
    role: iamEmrProfileRole.id,
    policy: `{
    "Version": "2012-10-17",
    "Statement": [{
        "Effect": "Allow",
        "Resource": "*",
        "Action": [
            "cloudwatch:*",
            "dynamodb:*",
            "ec2:Describe*",
            "elasticmapreduce:Describe*",
            "elasticmapreduce:ListBootstrapActions",
            "elasticmapreduce:ListClusters",
            "elasticmapreduce:ListInstanceGroups",
            "elasticmapreduce:ListInstances",
            "elasticmapreduce:ListSteps",
            "kinesis:CreateStream",
            "kinesis:DeleteStream",
            "kinesis:DescribeStream",
            "kinesis:GetRecords",
            "kinesis:GetShardIterator",
            "kinesis:MergeShards",
            "kinesis:PutRecord",
            "kinesis:SplitShard",
            "rds:Describe*",
            "s3:*",
            "sdb:*",
            "sns:*",
            "sqs:*"
        ]
    }]
}
`,
});
import pulumi
import pulumi_aws as aws

main_vpc = aws.ec2.Vpc("mainVpc",
    cidr_block="168.31.0.0/16",
    enable_dns_hostnames=True,
    tags={
        "name": "emr_test",
    })
main_subnet = aws.ec2.Subnet("mainSubnet",
    vpc_id=main_vpc.id,
    cidr_block="168.31.0.0/20",
    tags={
        "name": "emr_test",
    })
# IAM role for EMR Service
iam_emr_service_role = aws.iam.Role("iamEmrServiceRole", assume_role_policy="""{
  "Version": "2008-10-17",
  "Statement": [
    {
      "Sid": "",
      "Effect": "Allow",
      "Principal": {
        "Service": "elasticmapreduce.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}
""")
# IAM Role for EC2 Instance Profile
iam_emr_profile_role = aws.iam.Role("iamEmrProfileRole", assume_role_policy="""{
  "Version": "2008-10-17",
  "Statement": [
    {
      "Sid": "",
      "Effect": "Allow",
      "Principal": {
        "Service": "ec2.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}
""")
emr_profile = aws.iam.InstanceProfile("emrProfile", roles=[iam_emr_profile_role.name])
cluster = aws.emr.Cluster("cluster",
    release_label="emr-4.6.0",
    applications=["Spark"],
    ec2_attributes={
        "subnet_id": main_subnet.id,
        "emrManagedMasterSecurityGroup": aws_security_group["allow_all"]["id"],
        "emrManagedSlaveSecurityGroup": aws_security_group["allow_all"]["id"],
        "instanceProfile": emr_profile.arn,
    },
    master_instance_type="m5.xlarge",
    core_instance_type="m5.xlarge",
    core_instance_count=1,
    tags={
        "role": "rolename",
        "dns_zone": "env_zone",
        "env": "env",
        "name": "name-env",
    },
    bootstrap_actions=[{
        "path": "s3://elasticmapreduce/bootstrap-actions/run-if",
        "name": "runif",
        "args": [
            "instance.isMaster=true",
            "echo running on master node",
        ],
    }],
    configurations_json="""  [
    {
      "Classification": "hadoop-env",
      "Configurations": [
        {
          "Classification": "export",
          "Properties": {
            "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
          }
        }
      ],
      "Properties": {}
    },
    {
      "Classification": "spark-env",
      "Configurations": [
        {
          "Classification": "export",
          "Properties": {
            "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
          }
        }
      ],
      "Properties": {}
    }
  ]
""",
    service_role=iam_emr_service_role.arn)
allow_access = aws.ec2.SecurityGroup("allowAccess",
    description="Allow inbound traffic",
    vpc_id=main_vpc.id,
    ingress=[{
        "from_port": 0,
        "to_port": 0,
        "protocol": "-1",
        "cidr_blocks": main_vpc.cidr_block,
    }],
    egress=[{
        "from_port": 0,
        "to_port": 0,
        "protocol": "-1",
        "cidr_blocks": ["0.0.0.0/0"],
    }],
    tags={
        "name": "emr_test",
    },
    opts=ResourceOptions(depends_on=["aws_subnet.main"]))
gw = aws.ec2.InternetGateway("gw", vpc_id=main_vpc.id)
route_table = aws.ec2.RouteTable("routeTable",
    vpc_id=main_vpc.id,
    routes=[{
        "cidr_block": "0.0.0.0/0",
        "gateway_id": gw.id,
    }])
main_route_table_association = aws.ec2.MainRouteTableAssociation("mainRouteTableAssociation",
    vpc_id=main_vpc.id,
    route_table_id=route_table.id)
###
iam_emr_service_policy = aws.iam.RolePolicy("iamEmrServicePolicy",
    role=iam_emr_service_role.id,
    policy="""{
    "Version": "2012-10-17",
    "Statement": [{
        "Effect": "Allow",
        "Resource": "*",
        "Action": [
            "ec2:AuthorizeSecurityGroupEgress",
            "ec2:AuthorizeSecurityGroupIngress",
            "ec2:CancelSpotInstanceRequests",
            "ec2:CreateNetworkInterface",
            "ec2:CreateSecurityGroup",
            "ec2:CreateTags",
            "ec2:DeleteNetworkInterface",
            "ec2:DeleteSecurityGroup",
            "ec2:DeleteTags",
            "ec2:DescribeAvailabilityZones",
            "ec2:DescribeAccountAttributes",
            "ec2:DescribeDhcpOptions",
            "ec2:DescribeInstanceStatus",
            "ec2:DescribeInstances",
            "ec2:DescribeKeyPairs",
            "ec2:DescribeNetworkAcls",
            "ec2:DescribeNetworkInterfaces",
            "ec2:DescribePrefixLists",
            "ec2:DescribeRouteTables",
            "ec2:DescribeSecurityGroups",
            "ec2:DescribeSpotInstanceRequests",
            "ec2:DescribeSpotPriceHistory",
            "ec2:DescribeSubnets",
            "ec2:DescribeVpcAttribute",
            "ec2:DescribeVpcEndpoints",
            "ec2:DescribeVpcEndpointServices",
            "ec2:DescribeVpcs",
            "ec2:DetachNetworkInterface",
            "ec2:ModifyImageAttribute",
            "ec2:ModifyInstanceAttribute",
            "ec2:RequestSpotInstances",
            "ec2:RevokeSecurityGroupEgress",
            "ec2:RunInstances",
            "ec2:TerminateInstances",
            "ec2:DeleteVolume",
            "ec2:DescribeVolumeStatus",
            "ec2:DescribeVolumes",
            "ec2:DetachVolume",
            "iam:GetRole",
            "iam:GetRolePolicy",
            "iam:ListInstanceProfiles",
            "iam:ListRolePolicies",
            "iam:PassRole",
            "s3:CreateBucket",
            "s3:Get*",
            "s3:List*",
            "sdb:BatchPutAttributes",
            "sdb:Select",
            "sqs:CreateQueue",
            "sqs:Delete*",
            "sqs:GetQueue*",
            "sqs:PurgeQueue",
            "sqs:ReceiveMessage"
        ]
    }]
}
""")
iam_emr_profile_policy = aws.iam.RolePolicy("iamEmrProfilePolicy",
    role=iam_emr_profile_role.id,
    policy="""{
    "Version": "2012-10-17",
    "Statement": [{
        "Effect": "Allow",
        "Resource": "*",
        "Action": [
            "cloudwatch:*",
            "dynamodb:*",
            "ec2:Describe*",
            "elasticmapreduce:Describe*",
            "elasticmapreduce:ListBootstrapActions",
            "elasticmapreduce:ListClusters",
            "elasticmapreduce:ListInstanceGroups",
            "elasticmapreduce:ListInstances",
            "elasticmapreduce:ListSteps",
            "kinesis:CreateStream",
            "kinesis:DeleteStream",
            "kinesis:DescribeStream",
            "kinesis:GetRecords",
            "kinesis:GetShardIterator",
            "kinesis:MergeShards",
            "kinesis:PutRecord",
            "kinesis:SplitShard",
            "rds:Describe*",
            "s3:*",
            "sdb:*",
            "sns:*",
            "sqs:*"
        ]
    }]
}
""")
using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var mainVpc = new Aws.Ec2.Vpc("mainVpc", new Aws.Ec2.VpcArgs
        {
            CidrBlock = "168.31.0.0/16",
            EnableDnsHostnames = true,
            Tags = 
            {
                { "name", "emr_test" },
            },
        });
        var mainSubnet = new Aws.Ec2.Subnet("mainSubnet", new Aws.Ec2.SubnetArgs
        {
            VpcId = mainVpc.Id,
            CidrBlock = "168.31.0.0/20",
            Tags = 
            {
                { "name", "emr_test" },
            },
        });
        // IAM role for EMR Service
        var iamEmrServiceRole = new Aws.Iam.Role("iamEmrServiceRole", new Aws.Iam.RoleArgs
        {
            AssumeRolePolicy = @"{
  ""Version"": ""2008-10-17"",
  ""Statement"": [
    {
      ""Sid"": """",
      ""Effect"": ""Allow"",
      ""Principal"": {
        ""Service"": ""elasticmapreduce.amazonaws.com""
      },
      ""Action"": ""sts:AssumeRole""
    }
  ]
}
",
        });
        // IAM Role for EC2 Instance Profile
        var iamEmrProfileRole = new Aws.Iam.Role("iamEmrProfileRole", new Aws.Iam.RoleArgs
        {
            AssumeRolePolicy = @"{
  ""Version"": ""2008-10-17"",
  ""Statement"": [
    {
      ""Sid"": """",
      ""Effect"": ""Allow"",
      ""Principal"": {
        ""Service"": ""ec2.amazonaws.com""
      },
      ""Action"": ""sts:AssumeRole""
    }
  ]
}
",
        });
        var emrProfile = new Aws.Iam.InstanceProfile("emrProfile", new Aws.Iam.InstanceProfileArgs
        {
            Roles = 
            {
                iamEmrProfileRole.Name,
            },
        });
        var cluster = new Aws.Emr.Cluster("cluster", new Aws.Emr.ClusterArgs
        {
            ReleaseLabel = "emr-4.6.0",
            Applications = 
            {
                "Spark",
            },
            Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
            {
                SubnetId = mainSubnet.Id,
                EmrManagedMasterSecurityGroup = aws_security_group.Allow_all.Id,
                EmrManagedSlaveSecurityGroup = aws_security_group.Allow_all.Id,
                InstanceProfile = emrProfile.Arn,
            },
            MasterInstanceType = "m5.xlarge",
            CoreInstanceType = "m5.xlarge",
            CoreInstanceCount = 1,
            Tags = 
            {
                { "role", "rolename" },
                { "dns_zone", "env_zone" },
                { "env", "env" },
                { "name", "name-env" },
            },
            BootstrapActions = 
            {
                new Aws.Emr.Inputs.ClusterBootstrapActionArgs
                {
                    Path = "s3://elasticmapreduce/bootstrap-actions/run-if",
                    Name = "runif",
                    Args = 
                    {
                        "instance.isMaster=true",
                        "echo running on master node",
                    },
                },
            },
            ConfigurationsJson = @"  [
    {
      ""Classification"": ""hadoop-env"",
      ""Configurations"": [
        {
          ""Classification"": ""export"",
          ""Properties"": {
            ""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
          }
        }
      ],
      ""Properties"": {}
    },
    {
      ""Classification"": ""spark-env"",
      ""Configurations"": [
        {
          ""Classification"": ""export"",
          ""Properties"": {
            ""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
          }
        }
      ],
      ""Properties"": {}
    }
  ]
",
            ServiceRole = iamEmrServiceRole.Arn,
        });
        var allowAccess = new Aws.Ec2.SecurityGroup("allowAccess", new Aws.Ec2.SecurityGroupArgs
        {
            Description = "Allow inbound traffic",
            VpcId = mainVpc.Id,
            Ingress = 
            {
                new Aws.Ec2.Inputs.SecurityGroupIngressArgs
                {
                    FromPort = 0,
                    ToPort = 0,
                    Protocol = "-1",
                    CidrBlocks = mainVpc.CidrBlock,
                },
            },
            Egress = 
            {
                new Aws.Ec2.Inputs.SecurityGroupEgressArgs
                {
                    FromPort = 0,
                    ToPort = 0,
                    Protocol = "-1",
                    CidrBlocks = 
                    {
                        "0.0.0.0/0",
                    },
                },
            },
            Tags = 
            {
                { "name", "emr_test" },
            },
        }, new CustomResourceOptions
        {
            DependsOn = 
            {
                "aws_subnet.main",
            },
        });
        var gw = new Aws.Ec2.InternetGateway("gw", new Aws.Ec2.InternetGatewayArgs
        {
            VpcId = mainVpc.Id,
        });
        var routeTable = new Aws.Ec2.RouteTable("routeTable", new Aws.Ec2.RouteTableArgs
        {
            VpcId = mainVpc.Id,
            Routes = 
            {
                new Aws.Ec2.Inputs.RouteTableRouteArgs
                {
                    CidrBlock = "0.0.0.0/0",
                    GatewayId = gw.Id,
                },
            },
        });
        var mainRouteTableAssociation = new Aws.Ec2.MainRouteTableAssociation("mainRouteTableAssociation", new Aws.Ec2.MainRouteTableAssociationArgs
        {
            VpcId = mainVpc.Id,
            RouteTableId = routeTable.Id,
        });
        //##
        var iamEmrServicePolicy = new Aws.Iam.RolePolicy("iamEmrServicePolicy", new Aws.Iam.RolePolicyArgs
        {
            Role = iamEmrServiceRole.Id,
            Policy = @"{
    ""Version"": ""2012-10-17"",
    ""Statement"": [{
        ""Effect"": ""Allow"",
        ""Resource"": ""*"",
        ""Action"": [
            ""ec2:AuthorizeSecurityGroupEgress"",
            ""ec2:AuthorizeSecurityGroupIngress"",
            ""ec2:CancelSpotInstanceRequests"",
            ""ec2:CreateNetworkInterface"",
            ""ec2:CreateSecurityGroup"",
            ""ec2:CreateTags"",
            ""ec2:DeleteNetworkInterface"",
            ""ec2:DeleteSecurityGroup"",
            ""ec2:DeleteTags"",
            ""ec2:DescribeAvailabilityZones"",
            ""ec2:DescribeAccountAttributes"",
            ""ec2:DescribeDhcpOptions"",
            ""ec2:DescribeInstanceStatus"",
            ""ec2:DescribeInstances"",
            ""ec2:DescribeKeyPairs"",
            ""ec2:DescribeNetworkAcls"",
            ""ec2:DescribeNetworkInterfaces"",
            ""ec2:DescribePrefixLists"",
            ""ec2:DescribeRouteTables"",
            ""ec2:DescribeSecurityGroups"",
            ""ec2:DescribeSpotInstanceRequests"",
            ""ec2:DescribeSpotPriceHistory"",
            ""ec2:DescribeSubnets"",
            ""ec2:DescribeVpcAttribute"",
            ""ec2:DescribeVpcEndpoints"",
            ""ec2:DescribeVpcEndpointServices"",
            ""ec2:DescribeVpcs"",
            ""ec2:DetachNetworkInterface"",
            ""ec2:ModifyImageAttribute"",
            ""ec2:ModifyInstanceAttribute"",
            ""ec2:RequestSpotInstances"",
            ""ec2:RevokeSecurityGroupEgress"",
            ""ec2:RunInstances"",
            ""ec2:TerminateInstances"",
            ""ec2:DeleteVolume"",
            ""ec2:DescribeVolumeStatus"",
            ""ec2:DescribeVolumes"",
            ""ec2:DetachVolume"",
            ""iam:GetRole"",
            ""iam:GetRolePolicy"",
            ""iam:ListInstanceProfiles"",
            ""iam:ListRolePolicies"",
            ""iam:PassRole"",
            ""s3:CreateBucket"",
            ""s3:Get*"",
            ""s3:List*"",
            ""sdb:BatchPutAttributes"",
            ""sdb:Select"",
            ""sqs:CreateQueue"",
            ""sqs:Delete*"",
            ""sqs:GetQueue*"",
            ""sqs:PurgeQueue"",
            ""sqs:ReceiveMessage""
        ]
    }]
}
",
        });
        var iamEmrProfilePolicy = new Aws.Iam.RolePolicy("iamEmrProfilePolicy", new Aws.Iam.RolePolicyArgs
        {
            Role = iamEmrProfileRole.Id,
            Policy = @"{
    ""Version"": ""2012-10-17"",
    ""Statement"": [{
        ""Effect"": ""Allow"",
        ""Resource"": ""*"",
        ""Action"": [
            ""cloudwatch:*"",
            ""dynamodb:*"",
            ""ec2:Describe*"",
            ""elasticmapreduce:Describe*"",
            ""elasticmapreduce:ListBootstrapActions"",
            ""elasticmapreduce:ListClusters"",
            ""elasticmapreduce:ListInstanceGroups"",
            ""elasticmapreduce:ListInstances"",
            ""elasticmapreduce:ListSteps"",
            ""kinesis:CreateStream"",
            ""kinesis:DeleteStream"",
            ""kinesis:DescribeStream"",
            ""kinesis:GetRecords"",
            ""kinesis:GetShardIterator"",
            ""kinesis:MergeShards"",
            ""kinesis:PutRecord"",
            ""kinesis:SplitShard"",
            ""rds:Describe*"",
            ""s3:*"",
            ""sdb:*"",
            ""sns:*"",
            ""sqs:*""
        ]
    }]
}
",
        });
    }

}
package main

import (
	"fmt"

	"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/ec2"
	"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/emr"
	"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/iam"
	"github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		mainVpc, err := ec2.NewVpc(ctx, "mainVpc", &ec2.VpcArgs{
			CidrBlock:          pulumi.String("168.31.0.0/16"),
			EnableDnsHostnames: pulumi.Bool(true),
			Tags: pulumi.StringMap{
				"name": pulumi.String("emr_test"),
			},
		})
		if err != nil {
			return err
		}
		mainSubnet, err := ec2.NewSubnet(ctx, "mainSubnet", &ec2.SubnetArgs{
			VpcId:     mainVpc.ID(),
			CidrBlock: pulumi.String("168.31.0.0/20"),
			Tags: pulumi.StringMap{
				"name": pulumi.String("emr_test"),
			},
		})
		if err != nil {
			return err
		}
		iamEmrServiceRole, err := iam.NewRole(ctx, "iamEmrServiceRole", &iam.RoleArgs{
			AssumeRolePolicy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", "  \"Version\": \"2008-10-17\",\n", "  \"Statement\": [\n", "    {\n", "      \"Sid\": \"\",\n", "      \"Effect\": \"Allow\",\n", "      \"Principal\": {\n", "        \"Service\": \"elasticmapreduce.amazonaws.com\"\n", "      },\n", "      \"Action\": \"sts:AssumeRole\"\n", "    }\n", "  ]\n", "}\n")),
		})
		if err != nil {
			return err
		}
		iamEmrProfileRole, err := iam.NewRole(ctx, "iamEmrProfileRole", &iam.RoleArgs{
			AssumeRolePolicy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", "  \"Version\": \"2008-10-17\",\n", "  \"Statement\": [\n", "    {\n", "      \"Sid\": \"\",\n", "      \"Effect\": \"Allow\",\n", "      \"Principal\": {\n", "        \"Service\": \"ec2.amazonaws.com\"\n", "      },\n", "      \"Action\": \"sts:AssumeRole\"\n", "    }\n", "  ]\n", "}\n")),
		})
		if err != nil {
			return err
		}
		emrProfile, err := iam.NewInstanceProfile(ctx, "emrProfile", &iam.InstanceProfileArgs{
			Roles: pulumi.StringArray{
				iamEmrProfileRole.Name,
			},
		})
		if err != nil {
			return err
		}
		_, err = emr.NewCluster(ctx, "cluster", &emr.ClusterArgs{
			ReleaseLabel: pulumi.String("emr-4.6.0"),
			Applications: pulumi.StringArray{
				pulumi.String("Spark"),
			},
			Ec2Attributes: &emr.ClusterEc2AttributesArgs{
				SubnetId:                      mainSubnet.ID(),
				EmrManagedMasterSecurityGroup: pulumi.String(aws_security_group.Allow_all.Id),
				EmrManagedSlaveSecurityGroup:  pulumi.String(aws_security_group.Allow_all.Id),
				InstanceProfile:               emrProfile.Arn,
			},
			MasterInstanceType: pulumi.String("m5.xlarge"),
			CoreInstanceType:   pulumi.String("m5.xlarge"),
			CoreInstanceCount:  pulumi.Int(1),
			Tags: pulumi.StringMap{
				"role":     pulumi.String("rolename"),
				"dns_zone": pulumi.String("env_zone"),
				"env":      pulumi.String("env"),
				"name":     pulumi.String("name-env"),
			},
			BootstrapActions: emr.ClusterBootstrapActionArray{
				&emr.ClusterBootstrapActionArgs{
					Path: pulumi.String("s3://elasticmapreduce/bootstrap-actions/run-if"),
					Name: pulumi.String("runif"),
					Args: pulumi.StringArray{
						pulumi.String("instance.isMaster=true"),
						pulumi.String("echo running on master node"),
					},
				},
			},
			ConfigurationsJson: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", "  [\n", "    {\n", "      \"Classification\": \"hadoop-env\",\n", "      \"Configurations\": [\n", "        {\n", "          \"Classification\": \"export\",\n", "          \"Properties\": {\n", "            \"JAVA_HOME\": \"/usr/lib/jvm/java-1.8.0\"\n", "          }\n", "        }\n", "      ],\n", "      \"Properties\": {}\n", "    },\n", "    {\n", "      \"Classification\": \"spark-env\",\n", "      \"Configurations\": [\n", "        {\n", "          \"Classification\": \"export\",\n", "          \"Properties\": {\n", "            \"JAVA_HOME\": \"/usr/lib/jvm/java-1.8.0\"\n", "          }\n", "        }\n", "      ],\n", "      \"Properties\": {}\n", "    }\n", "  ]\n")),
			ServiceRole:        iamEmrServiceRole.Arn,
		})
		if err != nil {
			return err
		}
		_, err = ec2.NewSecurityGroup(ctx, "allowAccess", &ec2.SecurityGroupArgs{
			Description: pulumi.String("Allow inbound traffic"),
			VpcId:       mainVpc.ID(),
			Ingress: ec2.SecurityGroupIngressArray{
				&ec2.SecurityGroupIngressArgs{
					FromPort:   pulumi.Int(0),
					ToPort:     pulumi.Int(0),
					Protocol:   pulumi.String("-1"),
					CidrBlocks: mainVpc.CidrBlock,
				},
			},
			Egress: ec2.SecurityGroupEgressArray{
				&ec2.SecurityGroupEgressArgs{
					FromPort: pulumi.Int(0),
					ToPort:   pulumi.Int(0),
					Protocol: pulumi.String("-1"),
					CidrBlocks: pulumi.StringArray{
						pulumi.String("0.0.0.0/0"),
					},
				},
			},
			Tags: pulumi.StringMap{
				"name": pulumi.String("emr_test"),
			},
		}, pulumi.DependsOn([]pulumi.Resource{
			"aws_subnet.main",
		}))
		if err != nil {
			return err
		}
		gw, err := ec2.NewInternetGateway(ctx, "gw", &ec2.InternetGatewayArgs{
			VpcId: mainVpc.ID(),
		})
		if err != nil {
			return err
		}
		routeTable, err := ec2.NewRouteTable(ctx, "routeTable", &ec2.RouteTableArgs{
			VpcId: mainVpc.ID(),
			Routes: ec2.RouteTableRouteArray{
				&ec2.RouteTableRouteArgs{
					CidrBlock: pulumi.String("0.0.0.0/0"),
					GatewayId: gw.ID(),
				},
			},
		})
		if err != nil {
			return err
		}
		_, err = ec2.NewMainRouteTableAssociation(ctx, "mainRouteTableAssociation", &ec2.MainRouteTableAssociationArgs{
			VpcId:        mainVpc.ID(),
			RouteTableId: routeTable.ID(),
		})
		if err != nil {
			return err
		}
		_, err = iam.NewRolePolicy(ctx, "iamEmrServicePolicy", &iam.RolePolicyArgs{
			Role:   iamEmrServiceRole.ID(),
			Policy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", "    \"Version\": \"2012-10-17\",\n", "    \"Statement\": [{\n", "        \"Effect\": \"Allow\",\n", "        \"Resource\": \"*\",\n", "        \"Action\": [\n", "            \"ec2:AuthorizeSecurityGroupEgress\",\n", "            \"ec2:AuthorizeSecurityGroupIngress\",\n", "            \"ec2:CancelSpotInstanceRequests\",\n", "            \"ec2:CreateNetworkInterface\",\n", "            \"ec2:CreateSecurityGroup\",\n", "            \"ec2:CreateTags\",\n", "            \"ec2:DeleteNetworkInterface\",\n", "            \"ec2:DeleteSecurityGroup\",\n", "            \"ec2:DeleteTags\",\n", "            \"ec2:DescribeAvailabilityZones\",\n", "            \"ec2:DescribeAccountAttributes\",\n", "            \"ec2:DescribeDhcpOptions\",\n", "            \"ec2:DescribeInstanceStatus\",\n", "            \"ec2:DescribeInstances\",\n", "            \"ec2:DescribeKeyPairs\",\n", "            \"ec2:DescribeNetworkAcls\",\n", "            \"ec2:DescribeNetworkInterfaces\",\n", "            \"ec2:DescribePrefixLists\",\n", "            \"ec2:DescribeRouteTables\",\n", "            \"ec2:DescribeSecurityGroups\",\n", "            \"ec2:DescribeSpotInstanceRequests\",\n", "            \"ec2:DescribeSpotPriceHistory\",\n", "            \"ec2:DescribeSubnets\",\n", "            \"ec2:DescribeVpcAttribute\",\n", "            \"ec2:DescribeVpcEndpoints\",\n", "            \"ec2:DescribeVpcEndpointServices\",\n", "            \"ec2:DescribeVpcs\",\n", "            \"ec2:DetachNetworkInterface\",\n", "            \"ec2:ModifyImageAttribute\",\n", "            \"ec2:ModifyInstanceAttribute\",\n", "            \"ec2:RequestSpotInstances\",\n", "            \"ec2:RevokeSecurityGroupEgress\",\n", "            \"ec2:RunInstances\",\n", "            \"ec2:TerminateInstances\",\n", "            \"ec2:DeleteVolume\",\n", "            \"ec2:DescribeVolumeStatus\",\n", "            \"ec2:DescribeVolumes\",\n", "            \"ec2:DetachVolume\",\n", "            \"iam:GetRole\",\n", "            \"iam:GetRolePolicy\",\n", "            \"iam:ListInstanceProfiles\",\n", "            \"iam:ListRolePolicies\",\n", "            \"iam:PassRole\",\n", "            \"s3:CreateBucket\",\n", "            \"s3:Get*\",\n", "            \"s3:List*\",\n", "            \"sdb:BatchPutAttributes\",\n", "            \"sdb:Select\",\n", "            \"sqs:CreateQueue\",\n", "            \"sqs:Delete*\",\n", "            \"sqs:GetQueue*\",\n", "            \"sqs:PurgeQueue\",\n", "            \"sqs:ReceiveMessage\"\n", "        ]\n", "    }]\n", "}\n")),
		})
		if err != nil {
			return err
		}
		_, err = iam.NewRolePolicy(ctx, "iamEmrProfilePolicy", &iam.RolePolicyArgs{
			Role:   iamEmrProfileRole.ID(),
			Policy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", "    \"Version\": \"2012-10-17\",\n", "    \"Statement\": [{\n", "        \"Effect\": \"Allow\",\n", "        \"Resource\": \"*\",\n", "        \"Action\": [\n", "            \"cloudwatch:*\",\n", "            \"dynamodb:*\",\n", "            \"ec2:Describe*\",\n", "            \"elasticmapreduce:Describe*\",\n", "            \"elasticmapreduce:ListBootstrapActions\",\n", "            \"elasticmapreduce:ListClusters\",\n", "            \"elasticmapreduce:ListInstanceGroups\",\n", "            \"elasticmapreduce:ListInstances\",\n", "            \"elasticmapreduce:ListSteps\",\n", "            \"kinesis:CreateStream\",\n", "            \"kinesis:DeleteStream\",\n", "            \"kinesis:DescribeStream\",\n", "            \"kinesis:GetRecords\",\n", "            \"kinesis:GetShardIterator\",\n", "            \"kinesis:MergeShards\",\n", "            \"kinesis:PutRecord\",\n", "            \"kinesis:SplitShard\",\n", "            \"rds:Describe*\",\n", "            \"s3:*\",\n", "            \"sdb:*\",\n", "            \"sns:*\",\n", "            \"sqs:*\"\n", "        ]\n", "    }]\n", "}\n")),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

Example Usage

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var cluster = new Aws.Emr.Cluster("cluster", new Aws.Emr.ClusterArgs
        {
            AdditionalInfo = @"{
  ""instanceAwsClientConfiguration"": {
    ""proxyPort"": 8099,
    ""proxyHost"": ""myproxy.example.com""
  }
}

",
            Applications = 
            {
                "Spark",
            },
            BootstrapActions = 
            {
                new Aws.Emr.Inputs.ClusterBootstrapActionArgs
                {
                    Args = 
                    {
                        "instance.isMaster=true",
                        "echo running on master node",
                    },
                    Name = "runif",
                    Path = "s3://elasticmapreduce/bootstrap-actions/run-if",
                },
            },
            ConfigurationsJson = @"  [
    {
      ""Classification"": ""hadoop-env"",
      ""Configurations"": [
        {
          ""Classification"": ""export"",
          ""Properties"": {
            ""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
          }
        }
      ],
      ""Properties"": {}
    },
    {
      ""Classification"": ""spark-env"",
      ""Configurations"": [
        {
          ""Classification"": ""export"",
          ""Properties"": {
            ""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
          }
        }
      ],
      ""Properties"": {}
    }
  ]

",
            CoreInstanceGroup = new Aws.Emr.Inputs.ClusterCoreInstanceGroupArgs
            {
                AutoscalingPolicy = @"{
""Constraints"": {
  ""MinCapacity"": 1,
  ""MaxCapacity"": 2
},
""Rules"": [
  {
    ""Name"": ""ScaleOutMemoryPercentage"",
    ""Description"": ""Scale out if YARNMemoryAvailablePercentage is less than 15"",
    ""Action"": {
      ""SimpleScalingPolicyConfiguration"": {
        ""AdjustmentType"": ""CHANGE_IN_CAPACITY"",
        ""ScalingAdjustment"": 1,
        ""CoolDown"": 300
      }
    },
    ""Trigger"": {
      ""CloudWatchAlarmDefinition"": {
        ""ComparisonOperator"": ""LESS_THAN"",
        ""EvaluationPeriods"": 1,
        ""MetricName"": ""YARNMemoryAvailablePercentage"",
        ""Namespace"": ""AWS/ElasticMapReduce"",
        ""Period"": 300,
        ""Statistic"": ""AVERAGE"",
        ""Threshold"": 15.0,
        ""Unit"": ""PERCENT""
      }
    }
  }
]
}

",
                BidPrice = "0.30",
                EbsConfigs = 
                {
                    new Aws.Emr.Inputs.ClusterCoreInstanceGroupEbsConfigArgs
                    {
                        Size = 40,
                        Type = "gp2",
                        VolumesPerInstance = 1,
                    },
                },
                InstanceCount = 1,
                InstanceType = "c4.large",
            },
            EbsRootVolumeSize = 100,
            Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
            {
                EmrManagedMasterSecurityGroup = aws_security_group.Sg.Id,
                EmrManagedSlaveSecurityGroup = aws_security_group.Sg.Id,
                InstanceProfile = aws_iam_instance_profile.Emr_profile.Arn,
                SubnetId = aws_subnet.Main.Id,
            },
            KeepJobFlowAliveWhenNoSteps = true,
            MasterInstanceGroup = new Aws.Emr.Inputs.ClusterMasterInstanceGroupArgs
            {
                InstanceType = "m4.large",
            },
            ReleaseLabel = "emr-4.6.0",
            ServiceRole = aws_iam_role.Iam_emr_service_role.Arn,
            Tags = 
            {
                { "env", "env" },
                { "role", "rolename" },
            },
            TerminationProtection = false,
        });
    }

}
package main

import (
    "fmt"

    "github.com/pulumi/pulumi-aws/sdk/v2/go/aws/emr"
    "github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)

func main() {
    pulumi.Run(func(ctx *pulumi.Context) error {
        _, err := emr.NewCluster(ctx, "cluster", &emr.ClusterArgs{
            AdditionalInfo: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v", "{\n", "  \"instanceAwsClientConfiguration\": {\n", "    \"proxyPort\": 8099,\n", "    \"proxyHost\": \"myproxy.example.com\"\n", "  }\n", "}\n", "\n")),
            Applications: pulumi.StringArray{
                pulumi.String("Spark"),
            },
            BootstrapActions: emr.ClusterBootstrapActionArray{
                &emr.ClusterBootstrapActionArgs{
                    Args: pulumi.StringArray{
                        pulumi.String("instance.isMaster=true"),
                        pulumi.String("echo running on master node"),
                    },
                    Name: pulumi.String("runif"),
                    Path: pulumi.String("s3://elasticmapreduce/bootstrap-actions/run-if"),
                },
            },
            ConfigurationsJson: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", "  [\n", "    {\n", "      \"Classification\": \"hadoop-env\",\n", "      \"Configurations\": [\n", "        {\n", "          \"Classification\": \"export\",\n", "          \"Properties\": {\n", "            \"JAVA_HOME\": \"/usr/lib/jvm/java-1.8.0\"\n", "          }\n", "        }\n", "      ],\n", "      \"Properties\": {}\n", "    },\n", "    {\n", "      \"Classification\": \"spark-env\",\n", "      \"Configurations\": [\n", "        {\n", "          \"Classification\": \"export\",\n", "          \"Properties\": {\n", "            \"JAVA_HOME\": \"/usr/lib/jvm/java-1.8.0\"\n", "          }\n", "        }\n", "      ],\n", "      \"Properties\": {}\n", "    }\n", "  ]\n", "\n")),
            CoreInstanceGroup: &emr.ClusterCoreInstanceGroupArgs{
                AutoscalingPolicy: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v%v", "{\n", "\"Constraints\": {\n", "  \"MinCapacity\": 1,\n", "  \"MaxCapacity\": 2\n", "},\n", "\"Rules\": [\n", "  {\n", "    \"Name\": \"ScaleOutMemoryPercentage\",\n", "    \"Description\": \"Scale out if YARNMemoryAvailablePercentage is less than 15\",\n", "    \"Action\": {\n", "      \"SimpleScalingPolicyConfiguration\": {\n", "        \"AdjustmentType\": \"CHANGE_IN_CAPACITY\",\n", "        \"ScalingAdjustment\": 1,\n", "        \"CoolDown\": 300\n", "      }\n", "    },\n", "    \"Trigger\": {\n", "      \"CloudWatchAlarmDefinition\": {\n", "        \"ComparisonOperator\": \"LESS_THAN\",\n", "        \"EvaluationPeriods\": 1,\n", "        \"MetricName\": \"YARNMemoryAvailablePercentage\",\n", "        \"Namespace\": \"AWS/ElasticMapReduce\",\n", "        \"Period\": 300,\n", "        \"Statistic\": \"AVERAGE\",\n", "        \"Threshold\": 15.0,\n", "        \"Unit\": \"PERCENT\"\n", "      }\n", "    }\n", "  }\n", "]\n", "}\n", "\n")),
                BidPrice:          pulumi.String("0.30"),
                EbsConfigs: emr.ClusterCoreInstanceGroupEbsConfigArray{
                    &emr.ClusterCoreInstanceGroupEbsConfigArgs{
                        Size:               pulumi.Int(40),
                        Type:               pulumi.String("gp2"),
                        VolumesPerInstance: pulumi.Int(1),
                    },
                },
                InstanceCount: pulumi.Int(1),
                InstanceType:  pulumi.String("c4.large"),
            },
            EbsRootVolumeSize: pulumi.Int(100),
            Ec2Attributes: &emr.ClusterEc2AttributesArgs{
                EmrManagedMasterSecurityGroup: pulumi.String(aws_security_group.Sg.Id),
                EmrManagedSlaveSecurityGroup:  pulumi.String(aws_security_group.Sg.Id),
                InstanceProfile:               pulumi.String(aws_iam_instance_profile.Emr_profile.Arn),
                SubnetId:                      pulumi.String(aws_subnet.Main.Id),
            },
            KeepJobFlowAliveWhenNoSteps: pulumi.Bool(true),
            MasterInstanceGroup: &emr.ClusterMasterInstanceGroupArgs{
                InstanceType: pulumi.String("m4.large"),
            },
            ReleaseLabel: pulumi.String("emr-4.6.0"),
            ServiceRole:  pulumi.String(aws_iam_role.Iam_emr_service_role.Arn),
            Tags: pulumi.StringMap{
                "env":  pulumi.String("env"),
                "role": pulumi.String("rolename"),
            },
            TerminationProtection: pulumi.Bool(false),
        })
        if err != nil {
            return err
        }
        return nil
    })
}
import pulumi
import pulumi_aws as aws

cluster = aws.emr.Cluster("cluster",
    additional_info="""{
  "instanceAwsClientConfiguration": {
    "proxyPort": 8099,
    "proxyHost": "myproxy.example.com"
  }
}

""",
    applications=["Spark"],
    bootstrap_actions=[{
        "args": [
            "instance.isMaster=true",
            "echo running on master node",
        ],
        "name": "runif",
        "path": "s3://elasticmapreduce/bootstrap-actions/run-if",
    }],
    configurations_json="""  [
    {
      "Classification": "hadoop-env",
      "Configurations": [
        {
          "Classification": "export",
          "Properties": {
            "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
          }
        }
      ],
      "Properties": {}
    },
    {
      "Classification": "spark-env",
      "Configurations": [
        {
          "Classification": "export",
          "Properties": {
            "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
          }
        }
      ],
      "Properties": {}
    }
  ]

""",
    core_instance_group={
        "autoscaling_policy": """{
"Constraints": {
  "MinCapacity": 1,
  "MaxCapacity": 2
},
"Rules": [
  {
    "Name": "ScaleOutMemoryPercentage",
    "Description": "Scale out if YARNMemoryAvailablePercentage is less than 15",
    "Action": {
      "SimpleScalingPolicyConfiguration": {
        "AdjustmentType": "CHANGE_IN_CAPACITY",
        "ScalingAdjustment": 1,
        "CoolDown": 300
      }
    },
    "Trigger": {
      "CloudWatchAlarmDefinition": {
        "ComparisonOperator": "LESS_THAN",
        "EvaluationPeriods": 1,
        "MetricName": "YARNMemoryAvailablePercentage",
        "Namespace": "AWS/ElasticMapReduce",
        "Period": 300,
        "Statistic": "AVERAGE",
        "Threshold": 15.0,
        "Unit": "PERCENT"
      }
    }
  }
]
}

""",
        "bid_price": "0.30",
        "ebs_configs": [{
            "size": "40",
            "type": "gp2",
            "volumesPerInstance": 1,
        }],
        "instance_count": 1,
        "instance_type": "c4.large",
    },
    ebs_root_volume_size=100,
    ec2_attributes={
        "emrManagedMasterSecurityGroup": aws_security_group["sg"]["id"],
        "emrManagedSlaveSecurityGroup": aws_security_group["sg"]["id"],
        "instanceProfile": aws_iam_instance_profile["emr_profile"]["arn"],
        "subnet_id": aws_subnet["main"]["id"],
    },
    keep_job_flow_alive_when_no_steps=True,
    master_instance_group={
        "instance_type": "m4.large",
    },
    release_label="emr-4.6.0",
    service_role=aws_iam_role["iam_emr_service_role"]["arn"],
    tags={
        "env": "env",
        "role": "rolename",
    },
    termination_protection=False)
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const cluster = new aws.emr.Cluster("cluster", {
    additionalInfo: `{
  "instanceAwsClientConfiguration": {
    "proxyPort": 8099,
    "proxyHost": "myproxy.example.com"
  }
}
`,
    applications: ["Spark"],
    bootstrapActions: [{
        args: [
            "instance.isMaster=true",
            "echo running on master node",
        ],
        name: "runif",
        path: "s3://elasticmapreduce/bootstrap-actions/run-if",
    }],
    configurationsJson: `  [
    {
      "Classification": "hadoop-env",
      "Configurations": [
        {
          "Classification": "export",
          "Properties": {
            "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
          }
        }
      ],
      "Properties": {}
    },
    {
      "Classification": "spark-env",
      "Configurations": [
        {
          "Classification": "export",
          "Properties": {
            "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
          }
        }
      ],
      "Properties": {}
    }
  ]
`,
    coreInstanceGroup: {
        autoscalingPolicy: `{
"Constraints": {
  "MinCapacity": 1,
  "MaxCapacity": 2
},
"Rules": [
  {
    "Name": "ScaleOutMemoryPercentage",
    "Description": "Scale out if YARNMemoryAvailablePercentage is less than 15",
    "Action": {
      "SimpleScalingPolicyConfiguration": {
        "AdjustmentType": "CHANGE_IN_CAPACITY",
        "ScalingAdjustment": 1,
        "CoolDown": 300
      }
    },
    "Trigger": {
      "CloudWatchAlarmDefinition": {
        "ComparisonOperator": "LESS_THAN",
        "EvaluationPeriods": 1,
        "MetricName": "YARNMemoryAvailablePercentage",
        "Namespace": "AWS/ElasticMapReduce",
        "Period": 300,
        "Statistic": "AVERAGE",
        "Threshold": 15.0,
        "Unit": "PERCENT"
      }
    }
  }
]
}
`,
        bidPrice: "0.30",
        ebsConfigs: [{
            size: 40,
            type: "gp2",
            volumesPerInstance: 1,
        }],
        instanceCount: 1,
        instanceType: "c4.large",
    },
    ebsRootVolumeSize: 100,
    ec2Attributes: {
        emrManagedMasterSecurityGroup: aws_security_group_sg.id,
        emrManagedSlaveSecurityGroup: aws_security_group_sg.id,
        instanceProfile: aws_iam_instance_profile_emr_profile.arn,
        subnetId: aws_subnet_main.id,
    },
    keepJobFlowAliveWhenNoSteps: true,
    masterInstanceGroup: {
        instanceType: "m4.large",
    },
    releaseLabel: "emr-4.6.0",
    serviceRole: aws_iam_role_iam_emr_service_role.arn,
    tags: {
        env: "env",
        role: "rolename",
    },
    terminationProtection: false,
});

Enable Debug Logging

Coming soon!

Coming soon!

import pulumi
import pulumi_aws as aws

example = aws.emr.Cluster("example",
    lifecycle={
        "ignoreChanges": [
            "stepConcurrencyLevel",
            "steps",
        ],
    },
    steps=[{
        "actionOnFailure": "TERMINATE_CLUSTER",
        "hadoopJarStep": {
            "args": ["state-pusher-script"],
            "jar": "command-runner.jar",
        },
        "name": "Setup Hadoop Debugging",
    }])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.emr.Cluster("example", {
    steps: [{
        actionOnFailure: "TERMINATE_CLUSTER",
        hadoopJarStep: {
            args: ["state-pusher-script"],
            jar: "command-runner.jar",
        },
        name: "Setup Hadoop Debugging",
    }],
}, { ignoreChanges: ["stepConcurrencyLevel", "steps"] });

Multiple Node Master Instance Group

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        // Map public IP on launch must be enabled for public (Internet accessible) subnets
        var exampleSubnet = new Aws.Ec2.Subnet("exampleSubnet", new Aws.Ec2.SubnetArgs
        {
            MapPublicIpOnLaunch = true,
        });
        var exampleCluster = new Aws.Emr.Cluster("exampleCluster", new Aws.Emr.ClusterArgs
        {
            CoreInstanceGroup = ,
            Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
            {
                SubnetId = exampleSubnet.Id,
            },
            MasterInstanceGroup = new Aws.Emr.Inputs.ClusterMasterInstanceGroupArgs
            {
                InstanceCount = 3,
            },
            ReleaseLabel = "emr-5.24.1",
            TerminationProtection = true,
        });
    }

}
package main

import (
    "github.com/pulumi/pulumi-aws/sdk/v2/go/aws/ec2"
    "github.com/pulumi/pulumi-aws/sdk/v2/go/aws/emr"
    "github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)

func main() {
    pulumi.Run(func(ctx *pulumi.Context) error {
        exampleSubnet, err := ec2.NewSubnet(ctx, "exampleSubnet", &ec2.SubnetArgs{
            MapPublicIpOnLaunch: pulumi.Bool(true),
        })
        if err != nil {
            return err
        }
        _, err = emr.NewCluster(ctx, "exampleCluster", &emr.ClusterArgs{
            CoreInstanceGroup: nil,
            Ec2Attributes: &emr.ClusterEc2AttributesArgs{
                SubnetId: exampleSubnet.ID(),
            },
            MasterInstanceGroup: &emr.ClusterMasterInstanceGroupArgs{
                InstanceCount: pulumi.Int(3),
            },
            ReleaseLabel:          pulumi.String("emr-5.24.1"),
            TerminationProtection: pulumi.Bool(true),
        })
        if err != nil {
            return err
        }
        return nil
    })
}
import pulumi
import pulumi_aws as aws

# Map public IP on launch must be enabled for public (Internet accessible) subnets
example_subnet = aws.ec2.Subnet("exampleSubnet", map_public_ip_on_launch=True)
example_cluster = aws.emr.Cluster("exampleCluster",
    core_instance_group={},
    ec2_attributes={
        "subnet_id": example_subnet.id,
    },
    master_instance_group={
        "instance_count": 3,
    },
    release_label="emr-5.24.1",
    termination_protection=True)
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

// Map public IP on launch must be enabled for public (Internet accessible) subnets
const exampleSubnet = new aws.ec2.Subnet("example", {
    mapPublicIpOnLaunch: true,
});
const exampleCluster = new aws.emr.Cluster("example", {
    // core_instance_group must be configured
    coreInstanceGroup: {},
    ec2Attributes: {
        subnetId: exampleSubnet.id,
    },
    masterInstanceGroup: {
        // Master instance count must be set to 3
        instanceCount: 3,
    },
    // EMR version must be 5.23.0 or later
    releaseLabel: "emr-5.24.1",
    // Termination protection is automatically enabled for multiple masters
    // To destroy the cluster, this must be configured to false and applied first
    terminationProtection: true,
});

Create a Cluster Resource

new Cluster(name: string, args: ClusterArgs, opts?: CustomResourceOptions);
def Cluster(resource_name, opts=None, additional_info=None, applications=None, autoscaling_role=None, bootstrap_actions=None, configurations=None, configurations_json=None, core_instance_count=None, core_instance_group=None, core_instance_type=None, custom_ami_id=None, ebs_root_volume_size=None, ec2_attributes=None, instance_groups=None, keep_job_flow_alive_when_no_steps=None, kerberos_attributes=None, log_uri=None, master_instance_group=None, master_instance_type=None, name=None, release_label=None, scale_down_behavior=None, security_configuration=None, service_role=None, step_concurrency_level=None, steps=None, tags=None, termination_protection=None, visible_to_all_users=None, __props__=None);
func NewCluster(ctx *Context, name string, args ClusterArgs, opts ...ResourceOption) (*Cluster, error)
public Cluster(string name, ClusterArgs args, CustomResourceOptions? opts = null)
name string
The unique name of the resource.
args ClusterArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.
resource_name str
The unique name of the resource.
opts ResourceOptions
A bag of options that control this resource's behavior.
ctx Context
Context object for the current deployment.
name string
The unique name of the resource.
args ClusterArgs
The arguments to resource properties.
opts ResourceOption
Bag of options to control resource's behavior.
name string
The unique name of the resource.
args ClusterArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.

Cluster Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Programming Model docs.

Inputs

The Cluster resource accepts the following input properties:

ReleaseLabel string

The release label for the Amazon EMR release

ServiceRole string

IAM role that will be assumed by the Amazon EMR service to access AWS resources

AdditionalInfo string

A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.

Applications List<string>

A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive

AutoscalingRole string

An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.

BootstrapActions List<ClusterBootstrapActionArgs>

Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.

Configurations string

List of configurations supplied for the EMR cluster you are creating

ConfigurationsJson string

A JSON string for supplying list of configurations for the EMR cluster.

CoreInstanceCount int

Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1

Deprecated: use core_instance_group configuration block instance_count argument instead

CoreInstanceGroup ClusterCoreInstanceGroupArgs

Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.

CoreInstanceType string

Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.

Deprecated: use core_instance_group configuration block instance_type argument instead

CustomAmiId string

A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.

EbsRootVolumeSize int

Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.

Ec2Attributes ClusterEc2AttributesArgs

Attributes for the EC2 instances running the job flow. Defined below

InstanceGroups List<ClusterInstanceGroupArgs>

Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below

Deprecated: use master_instance_group configuration block, core_instance_group configuration block, and aws_emr_instance_group resource(s) instead

KeepJobFlowAliveWhenNoSteps bool

Switch on/off run cluster with no steps or when all steps are complete (default is on)

KerberosAttributes ClusterKerberosAttributesArgs

Kerberos configuration for the cluster. Defined below

LogUri string

S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created

MasterInstanceGroup ClusterMasterInstanceGroupArgs

Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.

MasterInstanceType string

Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.

Deprecated: use master_instance_group configuration block instance_type argument instead

Name string

The name of the step.

ScaleDownBehavior string

The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.

SecurityConfiguration string

The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater

StepConcurrencyLevel int

The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)

Steps List<ClusterStepArgs>

List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.

Tags Dictionary<string, string>

list of tags to apply to the EMR Cluster

TerminationProtection bool

Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.

VisibleToAllUsers bool

Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true

ReleaseLabel string

The release label for the Amazon EMR release

ServiceRole string

IAM role that will be assumed by the Amazon EMR service to access AWS resources

AdditionalInfo string

A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.

Applications []string

A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive

AutoscalingRole string

An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.

BootstrapActions []ClusterBootstrapAction

Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.

Configurations string

List of configurations supplied for the EMR cluster you are creating

ConfigurationsJson string

A JSON string for supplying list of configurations for the EMR cluster.

CoreInstanceCount int

Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1

Deprecated: use core_instance_group configuration block instance_count argument instead

CoreInstanceGroup ClusterCoreInstanceGroup

Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.

CoreInstanceType string

Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.

Deprecated: use core_instance_group configuration block instance_type argument instead

CustomAmiId string

A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.

EbsRootVolumeSize int

Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.

Ec2Attributes ClusterEc2Attributes

Attributes for the EC2 instances running the job flow. Defined below

InstanceGroups []ClusterInstanceGroup

Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below

Deprecated: use master_instance_group configuration block, core_instance_group configuration block, and aws_emr_instance_group resource(s) instead

KeepJobFlowAliveWhenNoSteps bool

Switch on/off run cluster with no steps or when all steps are complete (default is on)

KerberosAttributes ClusterKerberosAttributes

Kerberos configuration for the cluster. Defined below

LogUri string

S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created

MasterInstanceGroup ClusterMasterInstanceGroup

Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.

MasterInstanceType string

Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.

Deprecated: use master_instance_group configuration block instance_type argument instead

Name string

The name of the step.

ScaleDownBehavior string

The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.

SecurityConfiguration string

The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater

StepConcurrencyLevel int

The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)

Steps []ClusterStep

List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.

Tags map[string]string

list of tags to apply to the EMR Cluster

TerminationProtection bool

Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.

VisibleToAllUsers bool

Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true

releaseLabel string

The release label for the Amazon EMR release

serviceRole string

IAM role that will be assumed by the Amazon EMR service to access AWS resources

additionalInfo string

A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.

applications string[]

A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive

autoscalingRole string

An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.

bootstrapActions ClusterBootstrapAction[]

Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.

configurations string

List of configurations supplied for the EMR cluster you are creating

configurationsJson string

A JSON string for supplying list of configurations for the EMR cluster.

coreInstanceCount number

Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1

Deprecated: use core_instance_group configuration block instance_count argument instead

coreInstanceGroup ClusterCoreInstanceGroup

Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.

coreInstanceType string

Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.

Deprecated: use core_instance_group configuration block instance_type argument instead

customAmiId string

A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.

ebsRootVolumeSize number

Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.

ec2Attributes ClusterEc2Attributes

Attributes for the EC2 instances running the job flow. Defined below

instanceGroups ClusterInstanceGroup[]

Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below

Deprecated: use master_instance_group configuration block, core_instance_group configuration block, and aws_emr_instance_group resource(s) instead

keepJobFlowAliveWhenNoSteps boolean

Switch on/off run cluster with no steps or when all steps are complete (default is on)

kerberosAttributes ClusterKerberosAttributes

Kerberos configuration for the cluster. Defined below

logUri string

S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created

masterInstanceGroup ClusterMasterInstanceGroup

Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.

masterInstanceType string

Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.

Deprecated: use master_instance_group configuration block instance_type argument instead

name string

The name of the step.

scaleDownBehavior string

The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.

securityConfiguration string

The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater

stepConcurrencyLevel number

The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)

steps ClusterStep[]

List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.

tags {[key: string]: string}

list of tags to apply to the EMR Cluster

terminationProtection boolean

Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.

visibleToAllUsers boolean

Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true

release_label str

The release label for the Amazon EMR release

service_role str

IAM role that will be assumed by the Amazon EMR service to access AWS resources

additional_info str

A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.

applications List[str]

A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive

autoscaling_role str

An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.

bootstrap_actions List[ClusterBootstrapAction]

Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.

configurations str

List of configurations supplied for the EMR cluster you are creating

configurations_json str

A JSON string for supplying list of configurations for the EMR cluster.

core_instance_count float

Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1

Deprecated: use core_instance_group configuration block instance_count argument instead

core_instance_group Dict[ClusterCoreInstanceGroup]

Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.

core_instance_type str

Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.

Deprecated: use core_instance_group configuration block instance_type argument instead

custom_ami_id str

A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.

ebs_root_volume_size float

Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.

ec2_attributes Dict[ClusterEc2Attributes]

Attributes for the EC2 instances running the job flow. Defined below

instance_groups List[ClusterInstanceGroup]

Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below

Deprecated: use master_instance_group configuration block, core_instance_group configuration block, and aws_emr_instance_group resource(s) instead

keep_job_flow_alive_when_no_steps bool

Switch on/off run cluster with no steps or when all steps are complete (default is on)

kerberos_attributes Dict[ClusterKerberosAttributes]

Kerberos configuration for the cluster. Defined below

log_uri str

S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created

master_instance_group Dict[ClusterMasterInstanceGroup]

Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.

master_instance_type str

Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.

Deprecated: use master_instance_group configuration block instance_type argument instead

name str

The name of the step.

scale_down_behavior str

The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.

security_configuration str

The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater

step_concurrency_level float

The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)

steps List[ClusterStep]

List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.

tags Dict[str, str]

list of tags to apply to the EMR Cluster

termination_protection bool

Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.

visible_to_all_users bool

Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true

Outputs

All input properties are implicitly available as output properties. Additionally, the Cluster resource produces the following output properties:

Arn string
Id string
The provider-assigned unique ID for this managed resource.
MasterPublicDns string

The public DNS name of the master EC2 instance. * core_instance_group.0.id - Core node type Instance Group ID, if using Instance Group for this node type.

State string
Arn string
ClusterState string
Id string
The provider-assigned unique ID for this managed resource.
MasterPublicDns string

The public DNS name of the master EC2 instance. * core_instance_group.0.id - Core node type Instance Group ID, if using Instance Group for this node type.

arn string
clusterState string
id string
The provider-assigned unique ID for this managed resource.
masterPublicDns string

The public DNS name of the master EC2 instance. * core_instance_group.0.id - Core node type Instance Group ID, if using Instance Group for this node type.

arn str
cluster_state str
id str
The provider-assigned unique ID for this managed resource.
master_public_dns str

The public DNS name of the master EC2 instance. * core_instance_group.0.id - Core node type Instance Group ID, if using Instance Group for this node type.

Look up an Existing Cluster Resource

Get an existing Cluster resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: ClusterState, opts?: CustomResourceOptions): Cluster
static get(resource_name, id, opts=None, additional_info=None, applications=None, arn=None, autoscaling_role=None, bootstrap_actions=None, cluster_state=None, configurations=None, configurations_json=None, core_instance_count=None, core_instance_group=None, core_instance_type=None, custom_ami_id=None, ebs_root_volume_size=None, ec2_attributes=None, instance_groups=None, keep_job_flow_alive_when_no_steps=None, kerberos_attributes=None, log_uri=None, master_instance_group=None, master_instance_type=None, master_public_dns=None, name=None, release_label=None, scale_down_behavior=None, security_configuration=None, service_role=None, step_concurrency_level=None, steps=None, tags=None, termination_protection=None, visible_to_all_users=None, __props__=None);
func GetCluster(ctx *Context, name string, id IDInput, state *ClusterState, opts ...ResourceOption) (*Cluster, error)
public static Cluster Get(string name, Input<string> id, ClusterState? state, CustomResourceOptions? opts = null)
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
resource_name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.

The following state arguments are supported:

AdditionalInfo string

A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.

Applications List<string>

A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive

Arn string
AutoscalingRole string

An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.

BootstrapActions List<ClusterBootstrapActionArgs>

Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.

Configurations string

List of configurations supplied for the EMR cluster you are creating

ConfigurationsJson string

A JSON string for supplying list of configurations for the EMR cluster.

CoreInstanceCount int

Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1

Deprecated: use core_instance_group configuration block instance_count argument instead

CoreInstanceGroup ClusterCoreInstanceGroupArgs

Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.

CoreInstanceType string

Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.

Deprecated: use core_instance_group configuration block instance_type argument instead

CustomAmiId string

A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.

EbsRootVolumeSize int

Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.

Ec2Attributes ClusterEc2AttributesArgs

Attributes for the EC2 instances running the job flow. Defined below

InstanceGroups List<ClusterInstanceGroupArgs>

Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below

Deprecated: use master_instance_group configuration block, core_instance_group configuration block, and aws_emr_instance_group resource(s) instead

KeepJobFlowAliveWhenNoSteps bool

Switch on/off run cluster with no steps or when all steps are complete (default is on)

KerberosAttributes ClusterKerberosAttributesArgs

Kerberos configuration for the cluster. Defined below

LogUri string

S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created

MasterInstanceGroup ClusterMasterInstanceGroupArgs

Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.

MasterInstanceType string

Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.

Deprecated: use master_instance_group configuration block instance_type argument instead

MasterPublicDns string

The public DNS name of the master EC2 instance. * core_instance_group.0.id - Core node type Instance Group ID, if using Instance Group for this node type.

Name string

The name of the step.

ReleaseLabel string

The release label for the Amazon EMR release

ScaleDownBehavior string

The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.

SecurityConfiguration string

The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater

ServiceRole string

IAM role that will be assumed by the Amazon EMR service to access AWS resources

State string
StepConcurrencyLevel int

The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)

Steps List<ClusterStepArgs>

List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.

Tags Dictionary<string, string>

list of tags to apply to the EMR Cluster

TerminationProtection bool

Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.

VisibleToAllUsers bool

Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true

AdditionalInfo string

A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.

Applications []string

A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive

Arn string
AutoscalingRole string

An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.

BootstrapActions []ClusterBootstrapAction

Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.

ClusterState string
Configurations string

List of configurations supplied for the EMR cluster you are creating

ConfigurationsJson string

A JSON string for supplying list of configurations for the EMR cluster.

CoreInstanceCount int

Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1

Deprecated: use core_instance_group configuration block instance_count argument instead

CoreInstanceGroup ClusterCoreInstanceGroup

Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.

CoreInstanceType string

Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.

Deprecated: use core_instance_group configuration block instance_type argument instead

CustomAmiId string

A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.

EbsRootVolumeSize int

Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.

Ec2Attributes ClusterEc2Attributes

Attributes for the EC2 instances running the job flow. Defined below

InstanceGroups []ClusterInstanceGroup

Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below

Deprecated: use master_instance_group configuration block, core_instance_group configuration block, and aws_emr_instance_group resource(s) instead

KeepJobFlowAliveWhenNoSteps bool

Switch on/off run cluster with no steps or when all steps are complete (default is on)

KerberosAttributes ClusterKerberosAttributes

Kerberos configuration for the cluster. Defined below

LogUri string

S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created

MasterInstanceGroup ClusterMasterInstanceGroup

Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.

MasterInstanceType string

Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.

Deprecated: use master_instance_group configuration block instance_type argument instead

MasterPublicDns string

The public DNS name of the master EC2 instance. * core_instance_group.0.id - Core node type Instance Group ID, if using Instance Group for this node type.

Name string

The name of the step.

ReleaseLabel string

The release label for the Amazon EMR release

ScaleDownBehavior string

The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.

SecurityConfiguration string

The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater

ServiceRole string

IAM role that will be assumed by the Amazon EMR service to access AWS resources

StepConcurrencyLevel int

The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)

Steps []ClusterStep

List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.

Tags map[string]string

list of tags to apply to the EMR Cluster

TerminationProtection bool

Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.

VisibleToAllUsers bool

Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true

additionalInfo string

A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.

applications string[]

A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive

arn string
autoscalingRole string

An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.

bootstrapActions ClusterBootstrapAction[]

Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.

clusterState string
configurations string

List of configurations supplied for the EMR cluster you are creating

configurationsJson string

A JSON string for supplying list of configurations for the EMR cluster.

coreInstanceCount number

Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1

Deprecated: use core_instance_group configuration block instance_count argument instead

coreInstanceGroup ClusterCoreInstanceGroup

Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.

coreInstanceType string

Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.

Deprecated: use core_instance_group configuration block instance_type argument instead

customAmiId string

A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.

ebsRootVolumeSize number

Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.

ec2Attributes ClusterEc2Attributes

Attributes for the EC2 instances running the job flow. Defined below

instanceGroups ClusterInstanceGroup[]

Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below

Deprecated: use master_instance_group configuration block, core_instance_group configuration block, and aws_emr_instance_group resource(s) instead

keepJobFlowAliveWhenNoSteps boolean

Switch on/off run cluster with no steps or when all steps are complete (default is on)

kerberosAttributes ClusterKerberosAttributes

Kerberos configuration for the cluster. Defined below

logUri string

S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created

masterInstanceGroup ClusterMasterInstanceGroup

Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.

masterInstanceType string

Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.

Deprecated: use master_instance_group configuration block instance_type argument instead

masterPublicDns string

The public DNS name of the master EC2 instance. * core_instance_group.0.id - Core node type Instance Group ID, if using Instance Group for this node type.

name string

The name of the step.

releaseLabel string

The release label for the Amazon EMR release

scaleDownBehavior string

The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.

securityConfiguration string

The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater

serviceRole string

IAM role that will be assumed by the Amazon EMR service to access AWS resources

stepConcurrencyLevel number

The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)

steps ClusterStep[]

List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.

tags {[key: string]: string}

list of tags to apply to the EMR Cluster

terminationProtection boolean

Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.

visibleToAllUsers boolean

Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true

additional_info str

A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.

applications List[str]

A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive

arn str
autoscaling_role str

An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.

bootstrap_actions List[ClusterBootstrapAction]

Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.

cluster_state str
configurations str

List of configurations supplied for the EMR cluster you are creating

configurations_json str

A JSON string for supplying list of configurations for the EMR cluster.

core_instance_count float

Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster’s master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1

Deprecated: use core_instance_group configuration block instance_count argument instead

core_instance_group Dict[ClusterCoreInstanceGroup]

Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.

core_instance_type str

Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.

Deprecated: use core_instance_group configuration block instance_type argument instead

custom_ami_id str

A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.

ebs_root_volume_size float

Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.

ec2_attributes Dict[ClusterEc2Attributes]

Attributes for the EC2 instances running the job flow. Defined below

instance_groups List[ClusterInstanceGroup]

Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below

Deprecated: use master_instance_group configuration block, core_instance_group configuration block, and aws_emr_instance_group resource(s) instead

keep_job_flow_alive_when_no_steps bool

Switch on/off run cluster with no steps or when all steps are complete (default is on)

kerberos_attributes Dict[ClusterKerberosAttributes]

Kerberos configuration for the cluster. Defined below

log_uri str

S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created

master_instance_group Dict[ClusterMasterInstanceGroup]

Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.

master_instance_type str

Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.

Deprecated: use master_instance_group configuration block instance_type argument instead

master_public_dns str

The public DNS name of the master EC2 instance. * core_instance_group.0.id - Core node type Instance Group ID, if using Instance Group for this node type.

name str

The name of the step.

release_label str

The release label for the Amazon EMR release

scale_down_behavior str

The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.

security_configuration str

The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater

service_role str

IAM role that will be assumed by the Amazon EMR service to access AWS resources

step_concurrency_level float

The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)

steps List[ClusterStep]

List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.

tags Dict[str, str]

list of tags to apply to the EMR Cluster

termination_protection bool

Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.

visible_to_all_users bool

Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true

Supporting Types

ClusterBootstrapAction

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

Name string

The name of the step.

Path string

Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system

Args List<string>

List of command line arguments passed to the JAR file’s main function when executed.

Name string

The name of the step.

Path string

Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system

Args []string

List of command line arguments passed to the JAR file’s main function when executed.

name string

The name of the step.

path string

Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system

args string[]

List of command line arguments passed to the JAR file’s main function when executed.

name str

The name of the step.

path str

Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system

args List[str]

List of command line arguments passed to the JAR file’s main function when executed.

ClusterCoreInstanceGroup

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

InstanceType string

EC2 instance type for all instances in the instance group.

AutoscalingPolicy string

The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling

BidPrice string

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

EbsConfigs List<ClusterCoreInstanceGroupEbsConfigArgs>

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

Id string

The ID of the EMR Cluster

InstanceCount int

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

Name string

The name of the step.

InstanceType string

EC2 instance type for all instances in the instance group.

AutoscalingPolicy string

The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling

BidPrice string

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

EbsConfigs []ClusterCoreInstanceGroupEbsConfig

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

Id string

The ID of the EMR Cluster

InstanceCount int

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

Name string

The name of the step.

instanceType string

EC2 instance type for all instances in the instance group.

autoscalingPolicy string

The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling

bidPrice string

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

ebsConfigs ClusterCoreInstanceGroupEbsConfig[]

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

id string

The ID of the EMR Cluster

instanceCount number

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

name string

The name of the step.

instance_type str

EC2 instance type for all instances in the instance group.

autoscaling_policy str

The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling

bid_price str

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

ebs_configs List[ClusterCoreInstanceGroupEbsConfig]

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

id str

The ID of the EMR Cluster

instance_count float

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

name str

The name of the step.

ClusterCoreInstanceGroupEbsConfig

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

Size int

The volume size, in gibibytes (GiB).

Type string

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

Iops int

The number of I/O operations per second (IOPS) that the volume supports

VolumesPerInstance int

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

Size int

The volume size, in gibibytes (GiB).

Type string

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

Iops int

The number of I/O operations per second (IOPS) that the volume supports

VolumesPerInstance int

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

size number

The volume size, in gibibytes (GiB).

type string

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

iops number

The number of I/O operations per second (IOPS) that the volume supports

volumesPerInstance number

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

size float

The volume size, in gibibytes (GiB).

type str

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

iops float

The number of I/O operations per second (IOPS) that the volume supports

volumesPerInstance float

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

ClusterEc2Attributes

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

InstanceProfile string

Instance Profile for EC2 instances of the cluster assume this role

AdditionalMasterSecurityGroups string

String containing a comma separated list of additional Amazon EC2 security group IDs for the master node

AdditionalSlaveSecurityGroups string

String containing a comma separated list of additional Amazon EC2 security group IDs for the slave nodes as a comma separated string

EmrManagedMasterSecurityGroup string

Identifier of the Amazon EC2 EMR-Managed security group for the master node

EmrManagedSlaveSecurityGroup string

Identifier of the Amazon EC2 EMR-Managed security group for the slave nodes

KeyName string

Amazon EC2 key pair that can be used to ssh to the master node as the user called hadoop

ServiceAccessSecurityGroup string

Identifier of the Amazon EC2 service-access security group - required when the cluster runs on a private subnet

SubnetId string

VPC subnet id where you want the job flow to launch. Cannot specify the cc1.4xlarge instance type for nodes of a job flow launched in a Amazon VPC

InstanceProfile string

Instance Profile for EC2 instances of the cluster assume this role

AdditionalMasterSecurityGroups string

String containing a comma separated list of additional Amazon EC2 security group IDs for the master node

AdditionalSlaveSecurityGroups string

String containing a comma separated list of additional Amazon EC2 security group IDs for the slave nodes as a comma separated string

EmrManagedMasterSecurityGroup string

Identifier of the Amazon EC2 EMR-Managed security group for the master node

EmrManagedSlaveSecurityGroup string

Identifier of the Amazon EC2 EMR-Managed security group for the slave nodes

KeyName string

Amazon EC2 key pair that can be used to ssh to the master node as the user called hadoop

ServiceAccessSecurityGroup string

Identifier of the Amazon EC2 service-access security group - required when the cluster runs on a private subnet

SubnetId string

VPC subnet id where you want the job flow to launch. Cannot specify the cc1.4xlarge instance type for nodes of a job flow launched in a Amazon VPC

instanceProfile string

Instance Profile for EC2 instances of the cluster assume this role

additionalMasterSecurityGroups string

String containing a comma separated list of additional Amazon EC2 security group IDs for the master node

additionalSlaveSecurityGroups string

String containing a comma separated list of additional Amazon EC2 security group IDs for the slave nodes as a comma separated string

emrManagedMasterSecurityGroup string

Identifier of the Amazon EC2 EMR-Managed security group for the master node

emrManagedSlaveSecurityGroup string

Identifier of the Amazon EC2 EMR-Managed security group for the slave nodes

keyName string

Amazon EC2 key pair that can be used to ssh to the master node as the user called hadoop

serviceAccessSecurityGroup string

Identifier of the Amazon EC2 service-access security group - required when the cluster runs on a private subnet

subnetId string

VPC subnet id where you want the job flow to launch. Cannot specify the cc1.4xlarge instance type for nodes of a job flow launched in a Amazon VPC

instanceProfile str

Instance Profile for EC2 instances of the cluster assume this role

additionalMasterSecurityGroups str

String containing a comma separated list of additional Amazon EC2 security group IDs for the master node

additionalSlaveSecurityGroups str

String containing a comma separated list of additional Amazon EC2 security group IDs for the slave nodes as a comma separated string

emrManagedMasterSecurityGroup str

Identifier of the Amazon EC2 EMR-Managed security group for the master node

emrManagedSlaveSecurityGroup str

Identifier of the Amazon EC2 EMR-Managed security group for the slave nodes

key_name str

Amazon EC2 key pair that can be used to ssh to the master node as the user called hadoop

serviceAccessSecurityGroup str

Identifier of the Amazon EC2 service-access security group - required when the cluster runs on a private subnet

subnet_id str

VPC subnet id where you want the job flow to launch. Cannot specify the cc1.4xlarge instance type for nodes of a job flow launched in a Amazon VPC

ClusterInstanceGroup

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

InstanceRole string

The role of the instance group in the cluster. Valid values are: MASTER, CORE, and TASK.

InstanceType string

EC2 instance type for all instances in the instance group.

AutoscalingPolicy string

The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling

BidPrice string

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

EbsConfigs List<ClusterInstanceGroupEbsConfigArgs>

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

Id string

The ID of the EMR Cluster

InstanceCount int

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

Name string

The name of the step.

InstanceRole string

The role of the instance group in the cluster. Valid values are: MASTER, CORE, and TASK.

InstanceType string

EC2 instance type for all instances in the instance group.

AutoscalingPolicy string

The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling

BidPrice string

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

EbsConfigs []ClusterInstanceGroupEbsConfig

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

Id string

The ID of the EMR Cluster

InstanceCount int

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

Name string

The name of the step.

instanceRole string

The role of the instance group in the cluster. Valid values are: MASTER, CORE, and TASK.

instanceType string

EC2 instance type for all instances in the instance group.

autoscalingPolicy string

The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling

bidPrice string

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

ebsConfigs ClusterInstanceGroupEbsConfig[]

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

id string

The ID of the EMR Cluster

instanceCount number

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

name string

The name of the step.

instanceRole str

The role of the instance group in the cluster. Valid values are: MASTER, CORE, and TASK.

instance_type str

EC2 instance type for all instances in the instance group.

autoscaling_policy str

The autoscaling policy document. This is a JSON formatted string. See EMR Auto Scaling

bid_price str

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

ebs_configs List[ClusterInstanceGroupEbsConfig]

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

id str

The ID of the EMR Cluster

instance_count float

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

name str

The name of the step.

ClusterInstanceGroupEbsConfig

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

Size int

The volume size, in gibibytes (GiB).

Type string

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

Iops int

The number of I/O operations per second (IOPS) that the volume supports

VolumesPerInstance int

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

Size int

The volume size, in gibibytes (GiB).

Type string

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

Iops int

The number of I/O operations per second (IOPS) that the volume supports

VolumesPerInstance int

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

size number

The volume size, in gibibytes (GiB).

type string

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

iops number

The number of I/O operations per second (IOPS) that the volume supports

volumesPerInstance number

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

size float

The volume size, in gibibytes (GiB).

type str

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

iops float

The number of I/O operations per second (IOPS) that the volume supports

volumesPerInstance float

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

ClusterKerberosAttributes

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

KdcAdminPassword string

The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. This provider cannot perform drift detection of this configuration.

Realm string

The name of the Kerberos realm to which all nodes in a cluster belong. For example, EC2.INTERNAL

AdDomainJoinPassword string

The Active Directory password for ad_domain_join_user. This provider cannot perform drift detection of this configuration.

AdDomainJoinUser string

Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. This provider cannot perform drift detection of this configuration.

CrossRealmTrustPrincipalPassword string

Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. This provider cannot perform drift detection of this configuration.

KdcAdminPassword string

The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. This provider cannot perform drift detection of this configuration.

Realm string

The name of the Kerberos realm to which all nodes in a cluster belong. For example, EC2.INTERNAL

AdDomainJoinPassword string

The Active Directory password for ad_domain_join_user. This provider cannot perform drift detection of this configuration.

AdDomainJoinUser string

Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. This provider cannot perform drift detection of this configuration.

CrossRealmTrustPrincipalPassword string

Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. This provider cannot perform drift detection of this configuration.

kdcAdminPassword string

The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. This provider cannot perform drift detection of this configuration.

realm string

The name of the Kerberos realm to which all nodes in a cluster belong. For example, EC2.INTERNAL

adDomainJoinPassword string

The Active Directory password for ad_domain_join_user. This provider cannot perform drift detection of this configuration.

adDomainJoinUser string

Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. This provider cannot perform drift detection of this configuration.

crossRealmTrustPrincipalPassword string

Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. This provider cannot perform drift detection of this configuration.

kdcAdminPassword str

The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. This provider cannot perform drift detection of this configuration.

realm str

The name of the Kerberos realm to which all nodes in a cluster belong. For example, EC2.INTERNAL

adDomainJoinPassword str

The Active Directory password for ad_domain_join_user. This provider cannot perform drift detection of this configuration.

adDomainJoinUser str

Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. This provider cannot perform drift detection of this configuration.

crossRealmTrustPrincipalPassword str

Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. This provider cannot perform drift detection of this configuration.

ClusterMasterInstanceGroup

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

InstanceType string

EC2 instance type for all instances in the instance group.

BidPrice string

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

EbsConfigs List<ClusterMasterInstanceGroupEbsConfigArgs>

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

Id string

The ID of the EMR Cluster

InstanceCount int

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

Name string

The name of the step.

InstanceType string

EC2 instance type for all instances in the instance group.

BidPrice string

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

EbsConfigs []ClusterMasterInstanceGroupEbsConfig

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

Id string

The ID of the EMR Cluster

InstanceCount int

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

Name string

The name of the step.

instanceType string

EC2 instance type for all instances in the instance group.

bidPrice string

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

ebsConfigs ClusterMasterInstanceGroupEbsConfig[]

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

id string

The ID of the EMR Cluster

instanceCount number

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

name string

The name of the step.

instance_type str

EC2 instance type for all instances in the instance group.

bid_price str

Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.

ebs_configs List[ClusterMasterInstanceGroupEbsConfig]

Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.

id str

The ID of the EMR Cluster

instance_count float

Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource’s core_instance_group to be configured. Public (Internet accessible) instances must be created in VPC subnets that have map public IP on launch enabled. Termination protection is automatically enabled when launched with multiple master nodes and this provider must have the termination_protection = false configuration applied before destroying this resource.

name str

The name of the step.

ClusterMasterInstanceGroupEbsConfig

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

Size int

The volume size, in gibibytes (GiB).

Type string

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

Iops int

The number of I/O operations per second (IOPS) that the volume supports

VolumesPerInstance int

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

Size int

The volume size, in gibibytes (GiB).

Type string

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

Iops int

The number of I/O operations per second (IOPS) that the volume supports

VolumesPerInstance int

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

size number

The volume size, in gibibytes (GiB).

type string

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

iops number

The number of I/O operations per second (IOPS) that the volume supports

volumesPerInstance number

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

size float

The volume size, in gibibytes (GiB).

type str

The volume type. Valid options are gp2, io1, standard and st1. See EBS Volume Types.

iops float

The number of I/O operations per second (IOPS) that the volume supports

volumesPerInstance float

The number of EBS volumes with this configuration to attach to each EC2 instance in the instance group (default is 1)

ClusterStep

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

ActionOnFailure string

The action to take if the step fails. Valid values: TERMINATE_JOB_FLOW, TERMINATE_CLUSTER, CANCEL_AND_WAIT, and CONTINUE

HadoopJarStep ClusterStepHadoopJarStepArgs

The JAR file used for the step. Defined below.

Name string

The name of the step.

ActionOnFailure string

The action to take if the step fails. Valid values: TERMINATE_JOB_FLOW, TERMINATE_CLUSTER, CANCEL_AND_WAIT, and CONTINUE

HadoopJarStep ClusterStepHadoopJarStep

The JAR file used for the step. Defined below.

Name string

The name of the step.

actionOnFailure string

The action to take if the step fails. Valid values: TERMINATE_JOB_FLOW, TERMINATE_CLUSTER, CANCEL_AND_WAIT, and CONTINUE

hadoopJarStep ClusterStepHadoopJarStep

The JAR file used for the step. Defined below.

name string

The name of the step.

actionOnFailure str

The action to take if the step fails. Valid values: TERMINATE_JOB_FLOW, TERMINATE_CLUSTER, CANCEL_AND_WAIT, and CONTINUE

hadoopJarStep Dict[ClusterStepHadoopJarStep]

The JAR file used for the step. Defined below.

name str

The name of the step.

ClusterStepHadoopJarStep

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

Jar string

Path to a JAR file run during the step.

Args List<string>

List of command line arguments passed to the JAR file’s main function when executed.

MainClass string

Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.

Properties Dictionary<string, string>

Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.

Jar string

Path to a JAR file run during the step.

Args []string

List of command line arguments passed to the JAR file’s main function when executed.

MainClass string

Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.

Properties map[string]string

Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.

jar string

Path to a JAR file run during the step.

args string[]

List of command line arguments passed to the JAR file’s main function when executed.

mainClass string

Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.

properties {[key: string]: string}

Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.

jar str

Path to a JAR file run during the step.

args List[str]

List of command line arguments passed to the JAR file’s main function when executed.

mainClass str

Name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.

properties Dict[str, str]

Key-Value map of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.

Package Details

Repository
https://github.com/pulumi/pulumi-aws
License
Apache-2.0
Notes
This Pulumi package is based on the aws Terraform Provider.