Show / Hide Table of Contents

Class Cluster

Provides an Elastic MapReduce Cluster, a web service that makes it easy to process large amounts of data efficiently. See Amazon Elastic MapReduce Documentation for more information.

To configure Instance Groups for task nodes, see the aws.emr.InstanceGroup resource.

Support for Instance Fleets will be made available in an upcoming release.

Example Usage

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
public MyStack()
{
    var cluster = new Aws.Emr.Cluster("cluster", new Aws.Emr.ClusterArgs
    {
        AdditionalInfo = @"{
""instanceAwsClientConfiguration"": {
""proxyPort"": 8099,
""proxyHost"": ""myproxy.example.com""
}
}

",
        Applications = 
        {
            "Spark",
        },
        BootstrapActions = 
        {
            new Aws.Emr.Inputs.ClusterBootstrapActionArgs
            {
                Args = 
                {
                    "instance.isMaster=true",
                    "echo running on master node",
                },
                Name = "runif",
                Path = "s3://elasticmapreduce/bootstrap-actions/run-if",
            },
        },
        ConfigurationsJson = @"  [
{
  ""Classification"": ""hadoop-env"",
  ""Configurations"": [
    {
      ""Classification"": ""export"",
      ""Properties"": {
        ""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
      }
    }
  ],
  ""Properties"": {}
},
{
  ""Classification"": ""spark-env"",
  ""Configurations"": [
    {
      ""Classification"": ""export"",
      ""Properties"": {
        ""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
      }
    }
  ],
  ""Properties"": {}
}
]

",
        CoreInstanceGroup = new Aws.Emr.Inputs.ClusterCoreInstanceGroupArgs
        {
            AutoscalingPolicy = @"{
""Constraints"": {
""MinCapacity"": 1,
""MaxCapacity"": 2
},
""Rules"": [
{
""Name"": ""ScaleOutMemoryPercentage"",
""Description"": ""Scale out if YARNMemoryAvailablePercentage is less than 15"",
""Action"": {
  ""SimpleScalingPolicyConfiguration"": {
    ""AdjustmentType"": ""CHANGE_IN_CAPACITY"",
    ""ScalingAdjustment"": 1,
    ""CoolDown"": 300
  }
},
""Trigger"": {
  ""CloudWatchAlarmDefinition"": {
    ""ComparisonOperator"": ""LESS_THAN"",
    ""EvaluationPeriods"": 1,
    ""MetricName"": ""YARNMemoryAvailablePercentage"",
    ""Namespace"": ""AWS/ElasticMapReduce"",
    ""Period"": 300,
    ""Statistic"": ""AVERAGE"",
    ""Threshold"": 15.0,
    ""Unit"": ""PERCENT""
  }
}
}
]
}

",
            BidPrice = "0.30",
            EbsConfig = 
            {

                {
                    { "size", "40" },
                    { "type", "gp2" },
                    { "volumesPerInstance", 1 },
                },
            },
            InstanceCount = 1,
            InstanceType = "c4.large",
        },
        EbsRootVolumeSize = 100,
        Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
        {
            EmrManagedMasterSecurityGroup = aws_security_group.Sg.Id,
            EmrManagedSlaveSecurityGroup = aws_security_group.Sg.Id,
            InstanceProfile = aws_iam_instance_profile.Emr_profile.Arn,
            SubnetId = aws_subnet.Main.Id,
        },
        KeepJobFlowAliveWhenNoSteps = true,
        MasterInstanceGroup = new Aws.Emr.Inputs.ClusterMasterInstanceGroupArgs
        {
            InstanceType = "m4.large",
        },
        ReleaseLabel = "emr-4.6.0",
        ServiceRole = aws_iam_role.Iam_emr_service_role.Arn,
        Tags = 
        {
            { "env", "env" },
            { "role", "rolename" },
        },
        TerminationProtection = false,
    });
}

}

Multiple Node Master Instance Group

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
public MyStack()
{
    // Map public IP on launch must be enabled for public (Internet accessible) subnets
    var exampleSubnet = new Aws.Ec2.Subnet("exampleSubnet", new Aws.Ec2.SubnetArgs
    {
        MapPublicIpOnLaunch = true,
    });
    var exampleCluster = new Aws.Emr.Cluster("exampleCluster", new Aws.Emr.ClusterArgs
    {
        CoreInstanceGroup = ,
        Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
        {
            SubnetId = exampleSubnet.Id,
        },
        MasterInstanceGroup = new Aws.Emr.Inputs.ClusterMasterInstanceGroupArgs
        {
            InstanceCount = 3,
        },
        ReleaseLabel = "emr-5.24.1",
        TerminationProtection = true,
    });
}

}
Inheritance
System.Object
Resource
CustomResource
Cluster
Inherited Members
CustomResource.Id
Resource.GetResourceType()
Resource.GetResourceName()
Resource.Urn
System.Object.Equals(System.Object)
System.Object.Equals(System.Object, System.Object)
System.Object.GetHashCode()
System.Object.GetType()
System.Object.MemberwiseClone()
System.Object.ReferenceEquals(System.Object, System.Object)
System.Object.ToString()
Namespace: Pulumi.Aws.Emr
Assembly: Pulumi.Aws.dll
Syntax
public class Cluster : CustomResource

Constructors

View Source

Cluster(String, ClusterArgs, CustomResourceOptions)

Create a Cluster resource with the given unique name, arguments, and options.

Declaration
public Cluster(string name, ClusterArgs args, CustomResourceOptions options = null)
Parameters
Type Name Description
System.String name

The unique name of the resource

ClusterArgs args

The arguments used to populate this resource's properties

CustomResourceOptions options

A bag of options that control this resource's behavior

Properties

View Source

AdditionalInfo

A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.

Declaration
public Output<string> AdditionalInfo { get; }
Property Value
Type Description
Output<System.String>
View Source

Applications

A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive

Declaration
public Output<ImmutableArray<string>> Applications { get; }
Property Value
Type Description
Output<System.Collections.Immutable.ImmutableArray<System.String>>
View Source

Arn

Declaration
public Output<string> Arn { get; }
Property Value
Type Description
Output<System.String>
View Source

AutoscalingRole

An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.

Declaration
public Output<string> AutoscalingRole { get; }
Property Value
Type Description
Output<System.String>
View Source

BootstrapActions

Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.

Declaration
public Output<ImmutableArray<ClusterBootstrapAction>> BootstrapActions { get; }
Property Value
Type Description
Output<System.Collections.Immutable.ImmutableArray<ClusterBootstrapAction>>
View Source

Configurations

List of configurations supplied for the EMR cluster you are creating

Declaration
public Output<string> Configurations { get; }
Property Value
Type Description
Output<System.String>
View Source

ConfigurationsJson

A JSON string for supplying list of configurations for the EMR cluster.

Declaration
public Output<string> ConfigurationsJson { get; }
Property Value
Type Description
Output<System.String>
View Source

CoreInstanceCount

Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster's master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1

Declaration
public Output<int> CoreInstanceCount { get; }
Property Value
Type Description
Output<System.Int32>
View Source

CoreInstanceGroup

Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.

Declaration
public Output<ClusterCoreInstanceGroup> CoreInstanceGroup { get; }
Property Value
Type Description
Output<ClusterCoreInstanceGroup>
View Source

CoreInstanceType

Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.

Declaration
public Output<string> CoreInstanceType { get; }
Property Value
Type Description
Output<System.String>
View Source

CustomAmiId

A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.

Declaration
public Output<string> CustomAmiId { get; }
Property Value
Type Description
Output<System.String>
View Source

EbsRootVolumeSize

Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.

Declaration
public Output<int?> EbsRootVolumeSize { get; }
Property Value
Type Description
Output<System.Nullable<System.Int32>>
View Source

Ec2Attributes

Attributes for the EC2 instances running the job flow. Defined below

Declaration
public Output<ClusterEc2Attributes> Ec2Attributes { get; }
Property Value
Type Description
Output<ClusterEc2Attributes>
View Source

InstanceGroups

Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below

Declaration
public Output<ImmutableArray<ClusterInstanceGroup>> InstanceGroups { get; }
Property Value
Type Description
Output<System.Collections.Immutable.ImmutableArray<ClusterInstanceGroup>>
View Source

KeepJobFlowAliveWhenNoSteps

Switch on/off run cluster with no steps or when all steps are complete (default is on)

Declaration
public Output<bool> KeepJobFlowAliveWhenNoSteps { get; }
Property Value
Type Description
Output<System.Boolean>
View Source

KerberosAttributes

Kerberos configuration for the cluster. Defined below

Declaration
public Output<ClusterKerberosAttributes> KerberosAttributes { get; }
Property Value
Type Description
Output<ClusterKerberosAttributes>
View Source

LogUri

S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created

Declaration
public Output<string> LogUri { get; }
Property Value
Type Description
Output<System.String>
View Source

MasterInstanceGroup

Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.

Declaration
public Output<ClusterMasterInstanceGroup> MasterInstanceGroup { get; }
Property Value
Type Description
Output<ClusterMasterInstanceGroup>
View Source

MasterInstanceType

Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.

Declaration
public Output<string> MasterInstanceType { get; }
Property Value
Type Description
Output<System.String>
View Source

MasterPublicDns

The public DNS name of the master EC2 instance.

  • core_instance_group.0.id - Core node type Instance Group ID, if using Instance Group for this node type.
Declaration
public Output<string> MasterPublicDns { get; }
Property Value
Type Description
Output<System.String>
View Source

Name

The name of the step.

Declaration
public Output<string> Name { get; }
Property Value
Type Description
Output<System.String>
View Source

ReleaseLabel

The release label for the Amazon EMR release

Declaration
public Output<string> ReleaseLabel { get; }
Property Value
Type Description
Output<System.String>
View Source

ScaleDownBehavior

The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.

Declaration
public Output<string> ScaleDownBehavior { get; }
Property Value
Type Description
Output<System.String>
View Source

SecurityConfiguration

The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater

Declaration
public Output<string> SecurityConfiguration { get; }
Property Value
Type Description
Output<System.String>
View Source

ServiceRole

IAM role that will be assumed by the Amazon EMR service to access AWS resources

Declaration
public Output<string> ServiceRole { get; }
Property Value
Type Description
Output<System.String>
View Source

State

Declaration
public Output<string> State { get; }
Property Value
Type Description
Output<System.String>
View Source

StepConcurrencyLevel

The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)

Declaration
public Output<int?> StepConcurrencyLevel { get; }
Property Value
Type Description
Output<System.Nullable<System.Int32>>
View Source

Steps

List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.

Declaration
public Output<ImmutableArray<ClusterStep>> Steps { get; }
Property Value
Type Description
Output<System.Collections.Immutable.ImmutableArray<ClusterStep>>
View Source

Tags

list of tags to apply to the EMR Cluster

Declaration
public Output<ImmutableDictionary<string, object>> Tags { get; }
Property Value
Type Description
Output<System.Collections.Immutable.ImmutableDictionary<System.String, System.Object>>
View Source

TerminationProtection

Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.

Declaration
public Output<bool> TerminationProtection { get; }
Property Value
Type Description
Output<System.Boolean>
View Source

VisibleToAllUsers

Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true

Declaration
public Output<bool?> VisibleToAllUsers { get; }
Property Value
Type Description
Output<System.Nullable<System.Boolean>>

Methods

View Source

Get(String, Input<String>, ClusterState, CustomResourceOptions)

Get an existing Cluster resource's state with the given name, ID, and optional extra properties used to qualify the lookup.

Declaration
public static Cluster Get(string name, Input<string> id, ClusterState state = null, CustomResourceOptions options = null)
Parameters
Type Name Description
System.String name

The unique name of the resulting resource.

Input<System.String> id

The unique provider ID of the resource to lookup.

ClusterState state

Any extra arguments used during the lookup.

CustomResourceOptions options

A bag of options that control this resource's behavior

Returns
Type Description
Cluster
  • View Source
Back to top Copyright 2016-2020, Pulumi Corporation.