Crawler
Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide
Example Usage
DynamoDB Target
using Pulumi;
using Aws = Pulumi.Aws;
class MyStack : Stack
{
public MyStack()
{
var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
{
DatabaseName = aws_glue_catalog_database.Example.Name,
DynamodbTargets =
{
new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
{
Path = "table-name",
},
},
Role = aws_iam_role.Example.Arn,
});
}
}
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.String(aws_glue_catalog_database.Example.Name),
DynamodbTargets: glue.CrawlerDynamodbTargetArray{
&glue.CrawlerDynamodbTargetArgs{
Path: pulumi.String("table-name"),
},
},
Role: pulumi.String(aws_iam_role.Example.Arn),
})
if err != nil {
return err
}
return nil
})
}import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=aws_glue_catalog_database["example"]["name"],
dynamodb_targets=[{
"path": "table-name",
}],
role=aws_iam_role["example"]["arn"])import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: aws_glue_catalog_database_example.name,
dynamodbTargets: [{
path: "table-name",
}],
role: aws_iam_role_example.arn,
});JDBC Target
using Pulumi;
using Aws = Pulumi.Aws;
class MyStack : Stack
{
public MyStack()
{
var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
{
DatabaseName = aws_glue_catalog_database.Example.Name,
JdbcTargets =
{
new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
{
ConnectionName = aws_glue_connection.Example.Name,
Path = "database-name/%",
},
},
Role = aws_iam_role.Example.Arn,
});
}
}
package main
import (
"fmt"
"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.String(aws_glue_catalog_database.Example.Name),
JdbcTargets: glue.CrawlerJdbcTargetArray{
&glue.CrawlerJdbcTargetArgs{
ConnectionName: pulumi.String(aws_glue_connection.Example.Name),
Path: pulumi.String(fmt.Sprintf("%v%v", "database-name/", "%")),
},
},
Role: pulumi.String(aws_iam_role.Example.Arn),
})
if err != nil {
return err
}
return nil
})
}import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=aws_glue_catalog_database["example"]["name"],
jdbc_targets=[{
"connectionName": aws_glue_connection["example"]["name"],
"path": "database-name/%",
}],
role=aws_iam_role["example"]["arn"])import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: aws_glue_catalog_database_example.name,
jdbcTargets: [{
connectionName: aws_glue_connection_example.name,
path: "database-name/%",
}],
role: aws_iam_role_example.arn,
});S3 Target
using Pulumi;
using Aws = Pulumi.Aws;
class MyStack : Stack
{
public MyStack()
{
var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
{
DatabaseName = aws_glue_catalog_database.Example.Name,
Role = aws_iam_role.Example.Arn,
S3Targets =
{
new Aws.Glue.Inputs.CrawlerS3TargetArgs
{
Path = $"s3://{aws_s3_bucket.Example.Bucket}",
},
},
});
}
}
package main
import (
"fmt"
"github.com/pulumi/pulumi-aws/sdk/v2/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.String(aws_glue_catalog_database.Example.Name),
Role: pulumi.String(aws_iam_role.Example.Arn),
S3Targets: glue.CrawlerS3TargetArray{
&glue.CrawlerS3TargetArgs{
Path: pulumi.String(fmt.Sprintf("%v%v", "s3://", aws_s3_bucket.Example.Bucket)),
},
},
})
if err != nil {
return err
}
return nil
})
}import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=aws_glue_catalog_database["example"]["name"],
role=aws_iam_role["example"]["arn"],
s3_targets=[{
"path": f"s3://{aws_s3_bucket['example']['bucket']}",
}])import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: aws_glue_catalog_database_example.name,
role: aws_iam_role_example.arn,
s3Targets: [{
path: pulumi.interpolate`s3://${aws_s3_bucket_example.bucket}`,
}],
});Create a Crawler Resource
new Crawler(name: string, args: CrawlerArgs, opts?: CustomResourceOptions);def Crawler(resource_name, opts=None, catalog_targets=None, classifiers=None, configuration=None, database_name=None, description=None, dynamodb_targets=None, jdbc_targets=None, name=None, role=None, s3_targets=None, schedule=None, schema_change_policy=None, security_configuration=None, table_prefix=None, tags=None, __props__=None);func NewCrawler(ctx *Context, name string, args CrawlerArgs, opts ...ResourceOption) (*Crawler, error)public Crawler(string name, CrawlerArgs args, CustomResourceOptions? opts = null)- name string
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- opts ResourceOptions
- A bag of options that control this resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
Crawler Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Programming Model docs.
Inputs
The Crawler resource accepts the following input properties:
- Database
Name string Glue database where results are written.
- Role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- Catalog
Targets List<CrawlerCatalog Target Args> - Classifiers List<string>
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
JSON string of configuration information.
- Description string
Description of the crawler.
- Dynamodb
Targets List<CrawlerDynamodb Target Args> List of nested DynamoDB target arguments. See below.
- Jdbc
Targets List<CrawlerJdbc Target Args> List of nested JBDC target arguments. See below.
- Name string
Name of the crawler.
- S3Targets
List<Crawler
S3Target Args> List nested Amazon S3 target arguments. See below.
- Schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *).- Schema
Change CrawlerPolicy Schema Change Policy Args Policy for the crawler’s update and deletion behavior.
- Security
Configuration string The name of Security Configuration to be used by the crawler
- Table
Prefix string The table prefix used for catalog tables that are created.
- Dictionary<string, string>
Key-value map of resource tags
- Database
Name string Glue database where results are written.
- Role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- Catalog
Targets []CrawlerCatalog Target - Classifiers []string
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
JSON string of configuration information.
- Description string
Description of the crawler.
- Dynamodb
Targets []CrawlerDynamodb Target List of nested DynamoDB target arguments. See below.
- Jdbc
Targets []CrawlerJdbc Target List of nested JBDC target arguments. See below.
- Name string
Name of the crawler.
- S3Targets
[]Crawler
S3Target List nested Amazon S3 target arguments. See below.
- Schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *).- Schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler’s update and deletion behavior.
- Security
Configuration string The name of Security Configuration to be used by the crawler
- Table
Prefix string The table prefix used for catalog tables that are created.
- map[string]string
Key-value map of resource tags
- database
Name string Glue database where results are written.
- role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalog
Targets CrawlerCatalog Target[] - classifiers string[]
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration string
JSON string of configuration information.
- description string
Description of the crawler.
- dynamodb
Targets CrawlerDynamodb Target[] List of nested DynamoDB target arguments. See below.
- jdbc
Targets CrawlerJdbc Target[] List of nested JBDC target arguments. See below.
- name string
Name of the crawler.
- s3Targets
Crawler
S3Target[] List nested Amazon S3 target arguments. See below.
- schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *).- schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler’s update and deletion behavior.
- security
Configuration string The name of Security Configuration to be used by the crawler
- table
Prefix string The table prefix used for catalog tables that are created.
- {[key: string]: string}
Key-value map of resource tags
- database_
name str Glue database where results are written.
- role str
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalog_
targets List[CrawlerCatalog Target] - classifiers List[str]
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration str
JSON string of configuration information.
- description str
Description of the crawler.
- dynamodb_
targets List[CrawlerDynamodb Target] List of nested DynamoDB target arguments. See below.
- jdbc_
targets List[CrawlerJdbc Target] List of nested JBDC target arguments. See below.
- name str
Name of the crawler.
- s3_
targets List[CrawlerS3Target] List nested Amazon S3 target arguments. See below.
- schedule str
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *).- schema_
change_ Dict[Crawlerpolicy Schema Change Policy] Policy for the crawler’s update and deletion behavior.
- security_
configuration str The name of Security Configuration to be used by the crawler
- table_
prefix str The table prefix used for catalog tables that are created.
- Dict[str, str]
Key-value map of resource tags
Outputs
All input properties are implicitly available as output properties. Additionally, the Crawler resource produces the following output properties:
Look up an Existing Crawler Resource
Get an existing Crawler resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: CrawlerState, opts?: CustomResourceOptions): Crawlerstatic get(resource_name, id, opts=None, arn=None, catalog_targets=None, classifiers=None, configuration=None, database_name=None, description=None, dynamodb_targets=None, jdbc_targets=None, name=None, role=None, s3_targets=None, schedule=None, schema_change_policy=None, security_configuration=None, table_prefix=None, tags=None, __props__=None);func GetCrawler(ctx *Context, name string, id IDInput, state *CrawlerState, opts ...ResourceOption) (*Crawler, error)public static Crawler Get(string name, Input<string> id, CrawlerState? state, CustomResourceOptions? opts = null)- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
The following state arguments are supported:
- Arn string
The ARN of the crawler
- Catalog
Targets List<CrawlerCatalog Target Args> - Classifiers List<string>
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
JSON string of configuration information.
- Database
Name string Glue database where results are written.
- Description string
Description of the crawler.
- Dynamodb
Targets List<CrawlerDynamodb Target Args> List of nested DynamoDB target arguments. See below.
- Jdbc
Targets List<CrawlerJdbc Target Args> List of nested JBDC target arguments. See below.
- Name string
Name of the crawler.
- Role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- S3Targets
List<Crawler
S3Target Args> List nested Amazon S3 target arguments. See below.
- Schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *).- Schema
Change CrawlerPolicy Schema Change Policy Args Policy for the crawler’s update and deletion behavior.
- Security
Configuration string The name of Security Configuration to be used by the crawler
- Table
Prefix string The table prefix used for catalog tables that are created.
- Dictionary<string, string>
Key-value map of resource tags
- Arn string
The ARN of the crawler
- Catalog
Targets []CrawlerCatalog Target - Classifiers []string
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
JSON string of configuration information.
- Database
Name string Glue database where results are written.
- Description string
Description of the crawler.
- Dynamodb
Targets []CrawlerDynamodb Target List of nested DynamoDB target arguments. See below.
- Jdbc
Targets []CrawlerJdbc Target List of nested JBDC target arguments. See below.
- Name string
Name of the crawler.
- Role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- S3Targets
[]Crawler
S3Target List nested Amazon S3 target arguments. See below.
- Schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *).- Schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler’s update and deletion behavior.
- Security
Configuration string The name of Security Configuration to be used by the crawler
- Table
Prefix string The table prefix used for catalog tables that are created.
- map[string]string
Key-value map of resource tags
- arn string
The ARN of the crawler
- catalog
Targets CrawlerCatalog Target[] - classifiers string[]
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration string
JSON string of configuration information.
- database
Name string Glue database where results are written.
- description string
Description of the crawler.
- dynamodb
Targets CrawlerDynamodb Target[] List of nested DynamoDB target arguments. See below.
- jdbc
Targets CrawlerJdbc Target[] List of nested JBDC target arguments. See below.
- name string
Name of the crawler.
- role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3Targets
Crawler
S3Target[] List nested Amazon S3 target arguments. See below.
- schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *).- schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler’s update and deletion behavior.
- security
Configuration string The name of Security Configuration to be used by the crawler
- table
Prefix string The table prefix used for catalog tables that are created.
- {[key: string]: string}
Key-value map of resource tags
- arn str
The ARN of the crawler
- catalog_
targets List[CrawlerCatalog Target] - classifiers List[str]
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration str
JSON string of configuration information.
- database_
name str Glue database where results are written.
- description str
Description of the crawler.
- dynamodb_
targets List[CrawlerDynamodb Target] List of nested DynamoDB target arguments. See below.
- jdbc_
targets List[CrawlerJdbc Target] List of nested JBDC target arguments. See below.
- name str
Name of the crawler.
- role str
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3_
targets List[CrawlerS3Target] List nested Amazon S3 target arguments. See below.
- schedule str
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *).- schema_
change_ Dict[Crawlerpolicy Schema Change Policy] Policy for the crawler’s update and deletion behavior.
- security_
configuration str The name of Security Configuration to be used by the crawler
- table_
prefix str The table prefix used for catalog tables that are created.
- Dict[str, str]
Key-value map of resource tags
Supporting Types
CrawlerCatalogTarget
- Database
Name string The name of the Glue database to be synchronized.
- Tables List<string>
A list of catalog tables to be synchronized.
- Database
Name string The name of the Glue database to be synchronized.
- Tables []string
A list of catalog tables to be synchronized.
- database
Name string The name of the Glue database to be synchronized.
- tables string[]
A list of catalog tables to be synchronized.
- database_
name str The name of the Glue database to be synchronized.
- tables List[str]
A list of catalog tables to be synchronized.
CrawlerDynamodbTarget
CrawlerJdbcTarget
- Connection
Name string The name of the connection to use to connect to the JDBC target.
- Path string
The path of the JDBC target.
- Exclusions List<string>
A list of glob patterns used to exclude from the crawl.
- Connection
Name string The name of the connection to use to connect to the JDBC target.
- Path string
The path of the JDBC target.
- Exclusions []string
A list of glob patterns used to exclude from the crawl.
- connection
Name string The name of the connection to use to connect to the JDBC target.
- path string
The path of the JDBC target.
- exclusions string[]
A list of glob patterns used to exclude from the crawl.
- connection
Name str The name of the connection to use to connect to the JDBC target.
- path str
The path of the JDBC target.
- exclusions List[str]
A list of glob patterns used to exclude from the crawl.
CrawlerS3Target
- Path string
The name of the DynamoDB table to crawl.
- Exclusions List<string>
A list of glob patterns used to exclude from the crawl.
- Path string
The name of the DynamoDB table to crawl.
- Exclusions []string
A list of glob patterns used to exclude from the crawl.
- path string
The name of the DynamoDB table to crawl.
- exclusions string[]
A list of glob patterns used to exclude from the crawl.
- path str
The name of the DynamoDB table to crawl.
- exclusions List[str]
A list of glob patterns used to exclude from the crawl.
CrawlerSchemaChangePolicy
- Delete
Behavior string The deletion behavior when the crawler finds a deleted object. Valid values:
LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.- Update
Behavior string The update behavior when the crawler finds a changed schema. Valid values:
LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
- Delete
Behavior string The deletion behavior when the crawler finds a deleted object. Valid values:
LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.- Update
Behavior string The update behavior when the crawler finds a changed schema. Valid values:
LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
- delete
Behavior string The deletion behavior when the crawler finds a deleted object. Valid values:
LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.- update
Behavior string The update behavior when the crawler finds a changed schema. Valid values:
LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
- delete
Behavior str The deletion behavior when the crawler finds a deleted object. Valid values:
LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.- update
Behavior str The update behavior when the crawler finds a changed schema. Valid values:
LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
Package Details
- Repository
- https://github.com/pulumi/pulumi-aws
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
awsTerraform Provider.