Class Cluster
Provides an Elastic MapReduce Cluster, a web service that makes it easy to process large amounts of data efficiently. See Amazon Elastic MapReduce Documentation for more information.
To configure Instance Groups for task nodes, see the aws.emr.InstanceGroup resource.
Support for Instance Fleets will be made available in an upcoming release.
Example Usage
using Pulumi;
using Aws = Pulumi.Aws;
class MyStack : Stack
{
public MyStack()
{
var cluster = new Aws.Emr.Cluster("cluster", new Aws.Emr.ClusterArgs
{
AdditionalInfo = @"{
""instanceAwsClientConfiguration"": {
""proxyPort"": 8099,
""proxyHost"": ""myproxy.example.com""
}
}
",
Applications =
{
"Spark",
},
BootstrapActions =
{
new Aws.Emr.Inputs.ClusterBootstrapActionArgs
{
Args =
{
"instance.isMaster=true",
"echo running on master node",
},
Name = "runif",
Path = "s3://elasticmapreduce/bootstrap-actions/run-if",
},
},
ConfigurationsJson = @" [
{
""Classification"": ""hadoop-env"",
""Configurations"": [
{
""Classification"": ""export"",
""Properties"": {
""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
}
}
],
""Properties"": {}
},
{
""Classification"": ""spark-env"",
""Configurations"": [
{
""Classification"": ""export"",
""Properties"": {
""JAVA_HOME"": ""/usr/lib/jvm/java-1.8.0""
}
}
],
""Properties"": {}
}
]
",
CoreInstanceGroup = new Aws.Emr.Inputs.ClusterCoreInstanceGroupArgs
{
AutoscalingPolicy = @"{
""Constraints"": {
""MinCapacity"": 1,
""MaxCapacity"": 2
},
""Rules"": [
{
""Name"": ""ScaleOutMemoryPercentage"",
""Description"": ""Scale out if YARNMemoryAvailablePercentage is less than 15"",
""Action"": {
""SimpleScalingPolicyConfiguration"": {
""AdjustmentType"": ""CHANGE_IN_CAPACITY"",
""ScalingAdjustment"": 1,
""CoolDown"": 300
}
},
""Trigger"": {
""CloudWatchAlarmDefinition"": {
""ComparisonOperator"": ""LESS_THAN"",
""EvaluationPeriods"": 1,
""MetricName"": ""YARNMemoryAvailablePercentage"",
""Namespace"": ""AWS/ElasticMapReduce"",
""Period"": 300,
""Statistic"": ""AVERAGE"",
""Threshold"": 15.0,
""Unit"": ""PERCENT""
}
}
}
]
}
",
BidPrice = "0.30",
EbsConfig =
{
{
{ "size", "40" },
{ "type", "gp2" },
{ "volumesPerInstance", 1 },
},
},
InstanceCount = 1,
InstanceType = "c4.large",
},
EbsRootVolumeSize = 100,
Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
{
EmrManagedMasterSecurityGroup = aws_security_group.Sg.Id,
EmrManagedSlaveSecurityGroup = aws_security_group.Sg.Id,
InstanceProfile = aws_iam_instance_profile.Emr_profile.Arn,
SubnetId = aws_subnet.Main.Id,
},
KeepJobFlowAliveWhenNoSteps = true,
MasterInstanceGroup = new Aws.Emr.Inputs.ClusterMasterInstanceGroupArgs
{
InstanceType = "m4.large",
},
ReleaseLabel = "emr-4.6.0",
ServiceRole = aws_iam_role.Iam_emr_service_role.Arn,
Tags =
{
{ "env", "env" },
{ "role", "rolename" },
},
TerminationProtection = false,
});
}
}
Multiple Node Master Instance Group
using Pulumi;
using Aws = Pulumi.Aws;
class MyStack : Stack
{
public MyStack()
{
// Map public IP on launch must be enabled for public (Internet accessible) subnets
var exampleSubnet = new Aws.Ec2.Subnet("exampleSubnet", new Aws.Ec2.SubnetArgs
{
MapPublicIpOnLaunch = true,
});
var exampleCluster = new Aws.Emr.Cluster("exampleCluster", new Aws.Emr.ClusterArgs
{
CoreInstanceGroup = ,
Ec2Attributes = new Aws.Emr.Inputs.ClusterEc2AttributesArgs
{
SubnetId = exampleSubnet.Id,
},
MasterInstanceGroup = new Aws.Emr.Inputs.ClusterMasterInstanceGroupArgs
{
InstanceCount = 3,
},
ReleaseLabel = "emr-5.24.1",
TerminationProtection = true,
});
}
}
Inherited Members
Namespace: Pulumi.Aws.Emr
Assembly: Pulumi.Aws.dll
Syntax
public class Cluster : CustomResource
Constructors
View SourceCluster(String, ClusterArgs, CustomResourceOptions)
Create a Cluster resource with the given unique name, arguments, and options.
Declaration
public Cluster(string name, ClusterArgs args, CustomResourceOptions options = null)
Parameters
| Type | Name | Description |
|---|---|---|
| System.String | name | The unique name of the resource |
| ClusterArgs | args | The arguments used to populate this resource's properties |
| CustomResourceOptions | options | A bag of options that control this resource's behavior |
Properties
View SourceAdditionalInfo
A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore this provider cannot detect drift from the actual EMR cluster if its value is changed outside this provider.
Declaration
public Output<string> AdditionalInfo { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
Applications
A list of applications for the cluster. Valid values are: Flink, Hadoop, Hive, Mahout, Pig, Spark, and JupyterHub (as of EMR 5.14.0). Case insensitive
Declaration
public Output<ImmutableArray<string>> Applications { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Collections.Immutable.ImmutableArray<System.String>> |
Arn
Declaration
public Output<string> Arn { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
AutoscalingRole
An IAM role for automatic scaling policies. The IAM role provides permissions that the automatic scaling feature requires to launch and terminate EC2 instances in an instance group.
Declaration
public Output<string> AutoscalingRole { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
BootstrapActions
Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. Defined below.
Declaration
public Output<ImmutableArray<ClusterBootstrapAction>> BootstrapActions { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Collections.Immutable.ImmutableArray<ClusterBootstrapAction>> |
Configurations
List of configurations supplied for the EMR cluster you are creating
Declaration
public Output<string> Configurations { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
ConfigurationsJson
A JSON string for supplying list of configurations for the EMR cluster.
Declaration
public Output<string> ConfigurationsJson { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
CoreInstanceCount
Use the core_instance_group configuration block instance_count argument instead. Number of Amazon EC2 instances used to execute the job flow. EMR will use one node as the cluster's master node and use the remainder of the nodes (core_instance_count-1) as core nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set. Default 1
Declaration
public Output<int> CoreInstanceCount { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Int32> |
CoreInstanceGroup
Configuration block to use an Instance Group for the core node type. Cannot be specified if core_instance_count argument, core_instance_type argument, or instance_group configuration blocks are set. Detailed below.
Declaration
public Output<ClusterCoreInstanceGroup> CoreInstanceGroup { get; }
Property Value
| Type | Description |
|---|---|
| Output<ClusterCoreInstanceGroup> |
CoreInstanceType
Use the core_instance_group configuration block instance_type argument instead. The EC2 instance type of the slave nodes. Cannot be specified if core_instance_group or instance_group configuration blocks are set.
Declaration
public Output<string> CoreInstanceType { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
CustomAmiId
A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later.
Declaration
public Output<string> CustomAmiId { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
EbsRootVolumeSize
Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later.
Declaration
public Output<int?> EbsRootVolumeSize { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Nullable<System.Int32>> |
Ec2Attributes
Attributes for the EC2 instances running the job flow. Defined below
Declaration
public Output<ClusterEc2Attributes> Ec2Attributes { get; }
Property Value
| Type | Description |
|---|---|
| Output<ClusterEc2Attributes> |
InstanceGroups
Use the master_instance_group configuration block, core_instance_group configuration block and aws.emr.InstanceGroup resource(s) instead. A list of instance_group objects for each instance group in the cluster. Exactly one of master_instance_type and instance_group must be specified. If instance_group is set, then it must contain a configuration block for at least the MASTER instance group type (as well as any additional instance groups). Cannot be specified if master_instance_group or core_instance_group configuration blocks are set. Defined below
Declaration
public Output<ImmutableArray<ClusterInstanceGroup>> InstanceGroups { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Collections.Immutable.ImmutableArray<ClusterInstanceGroup>> |
KeepJobFlowAliveWhenNoSteps
Switch on/off run cluster with no steps or when all steps are complete (default is on)
Declaration
public Output<bool> KeepJobFlowAliveWhenNoSteps { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Boolean> |
KerberosAttributes
Kerberos configuration for the cluster. Defined below
Declaration
public Output<ClusterKerberosAttributes> KerberosAttributes { get; }
Property Value
| Type | Description |
|---|---|
| Output<ClusterKerberosAttributes> |
LogUri
S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
Declaration
public Output<string> LogUri { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
MasterInstanceGroup
Configuration block to use an Instance Group for the master node type. Cannot be specified if master_instance_type argument or instance_group configuration blocks are set. Detailed below.
Declaration
public Output<ClusterMasterInstanceGroup> MasterInstanceGroup { get; }
Property Value
| Type | Description |
|---|---|
| Output<ClusterMasterInstanceGroup> |
MasterInstanceType
Use the master_instance_group configuration block instance_type argument instead. The EC2 instance type of the master node. Cannot be specified if master_instance_group or instance_group configuration blocks are set.
Declaration
public Output<string> MasterInstanceType { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
MasterPublicDns
The public DNS name of the master EC2 instance.
core_instance_group.0.id- Core node type Instance Group ID, if using Instance Group for this node type.
Declaration
public Output<string> MasterPublicDns { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
Name
The name of the step.
Declaration
public Output<string> Name { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
ReleaseLabel
The release label for the Amazon EMR release
Declaration
public Output<string> ReleaseLabel { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
ScaleDownBehavior
The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized.
Declaration
public Output<string> ScaleDownBehavior { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
SecurityConfiguration
The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with release_label 4.8.0 or greater
Declaration
public Output<string> SecurityConfiguration { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
ServiceRole
IAM role that will be assumed by the Amazon EMR service to access AWS resources
Declaration
public Output<string> ServiceRole { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
State
Declaration
public Output<string> State { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.String> |
StepConcurrencyLevel
The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater. (default is 1)
Declaration
public Output<int?> StepConcurrencyLevel { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Nullable<System.Int32>> |
Steps
List of steps to run when creating the cluster. Defined below. It is highly recommended to utilize ignoreChanges if other steps are being managed outside of this provider.
Declaration
public Output<ImmutableArray<ClusterStep>> Steps { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Collections.Immutable.ImmutableArray<ClusterStep>> |
Tags
list of tags to apply to the EMR Cluster
Declaration
public Output<ImmutableDictionary<string, object>> Tags { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Collections.Immutable.ImmutableDictionary<System.String, System.Object>> |
TerminationProtection
Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false.
Declaration
public Output<bool> TerminationProtection { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Boolean> |
VisibleToAllUsers
Whether the job flow is visible to all IAM users of the AWS account associated with the job flow. Default true
Declaration
public Output<bool?> VisibleToAllUsers { get; }
Property Value
| Type | Description |
|---|---|
| Output<System.Nullable<System.Boolean>> |
Methods
View SourceGet(String, Input<String>, ClusterState, CustomResourceOptions)
Get an existing Cluster resource's state with the given name, ID, and optional extra properties used to qualify the lookup.
Declaration
public static Cluster Get(string name, Input<string> id, ClusterState state = null, CustomResourceOptions options = null)
Parameters
| Type | Name | Description |
|---|---|---|
| System.String | name | The unique name of the resulting resource. |
| Input<System.String> | id | The unique provider ID of the resource to lookup. |
| ClusterState | state | Any extra arguments used during the lookup. |
| CustomResourceOptions | options | A bag of options that control this resource's behavior |
Returns
| Type | Description |
|---|---|
| Cluster |