Crawler

Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide

Example Usage

DynamoDB Target

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
        {
            DatabaseName = aws_glue_catalog_database.Example.Name,
            DynamodbTargets = 
            {
                new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
                {
                    Path = "table-name",
                },
            },
            Role = aws_iam_role.Example.Arn,
        });
    }

}
package main

import (
    "github.com/pulumi/pulumi-aws/sdk/v2/go/aws/glue"
    "github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)

func main() {
    pulumi.Run(func(ctx *pulumi.Context) error {
        _, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
            DatabaseName: pulumi.String(aws_glue_catalog_database.Example.Name),
            DynamodbTargets: glue.CrawlerDynamodbTargetArray{
                &glue.CrawlerDynamodbTargetArgs{
                    Path: pulumi.String("table-name"),
                },
            },
            Role: pulumi.String(aws_iam_role.Example.Arn),
        })
        if err != nil {
            return err
        }
        return nil
    })
}
import pulumi
import pulumi_aws as aws

example = aws.glue.Crawler("example",
    database_name=aws_glue_catalog_database["example"]["name"],
    dynamodb_targets=[{
        "path": "table-name",
    }],
    role=aws_iam_role["example"]["arn"])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.glue.Crawler("example", {
    databaseName: aws_glue_catalog_database_example.name,
    dynamodbTargets: [{
        path: "table-name",
    }],
    role: aws_iam_role_example.arn,
});

JDBC Target

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
        {
            DatabaseName = aws_glue_catalog_database.Example.Name,
            JdbcTargets = 
            {
                new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
                {
                    ConnectionName = aws_glue_connection.Example.Name,
                    Path = "database-name/%",
                },
            },
            Role = aws_iam_role.Example.Arn,
        });
    }

}
package main

import (
    "fmt"

    "github.com/pulumi/pulumi-aws/sdk/v2/go/aws/glue"
    "github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)

func main() {
    pulumi.Run(func(ctx *pulumi.Context) error {
        _, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
            DatabaseName: pulumi.String(aws_glue_catalog_database.Example.Name),
            JdbcTargets: glue.CrawlerJdbcTargetArray{
                &glue.CrawlerJdbcTargetArgs{
                    ConnectionName: pulumi.String(aws_glue_connection.Example.Name),
                    Path:           pulumi.String(fmt.Sprintf("%v%v", "database-name/", "%")),
                },
            },
            Role: pulumi.String(aws_iam_role.Example.Arn),
        })
        if err != nil {
            return err
        }
        return nil
    })
}
import pulumi
import pulumi_aws as aws

example = aws.glue.Crawler("example",
    database_name=aws_glue_catalog_database["example"]["name"],
    jdbc_targets=[{
        "connectionName": aws_glue_connection["example"]["name"],
        "path": "database-name/%",
    }],
    role=aws_iam_role["example"]["arn"])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.glue.Crawler("example", {
    databaseName: aws_glue_catalog_database_example.name,
    jdbcTargets: [{
        connectionName: aws_glue_connection_example.name,
        path: "database-name/%",
    }],
    role: aws_iam_role_example.arn,
});

S3 Target

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
        {
            DatabaseName = aws_glue_catalog_database.Example.Name,
            Role = aws_iam_role.Example.Arn,
            S3Targets = 
            {
                new Aws.Glue.Inputs.CrawlerS3TargetArgs
                {
                    Path = $"s3://{aws_s3_bucket.Example.Bucket}",
                },
            },
        });
    }

}
package main

import (
    "fmt"

    "github.com/pulumi/pulumi-aws/sdk/v2/go/aws/glue"
    "github.com/pulumi/pulumi/sdk/v2/go/pulumi"
)

func main() {
    pulumi.Run(func(ctx *pulumi.Context) error {
        _, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
            DatabaseName: pulumi.String(aws_glue_catalog_database.Example.Name),
            Role:         pulumi.String(aws_iam_role.Example.Arn),
            S3Targets: glue.CrawlerS3TargetArray{
                &glue.CrawlerS3TargetArgs{
                    Path: pulumi.String(fmt.Sprintf("%v%v", "s3://", aws_s3_bucket.Example.Bucket)),
                },
            },
        })
        if err != nil {
            return err
        }
        return nil
    })
}
import pulumi
import pulumi_aws as aws

example = aws.glue.Crawler("example",
    database_name=aws_glue_catalog_database["example"]["name"],
    role=aws_iam_role["example"]["arn"],
    s3_targets=[{
        "path": f"s3://{aws_s3_bucket['example']['bucket']}",
    }])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.glue.Crawler("example", {
    databaseName: aws_glue_catalog_database_example.name,
    role: aws_iam_role_example.arn,
    s3Targets: [{
        path: pulumi.interpolate`s3://${aws_s3_bucket_example.bucket}`,
    }],
});

Create a Crawler Resource

new Crawler(name: string, args: CrawlerArgs, opts?: CustomResourceOptions);
def Crawler(resource_name, opts=None, catalog_targets=None, classifiers=None, configuration=None, database_name=None, description=None, dynamodb_targets=None, jdbc_targets=None, name=None, role=None, s3_targets=None, schedule=None, schema_change_policy=None, security_configuration=None, table_prefix=None, tags=None, __props__=None);
func NewCrawler(ctx *Context, name string, args CrawlerArgs, opts ...ResourceOption) (*Crawler, error)
public Crawler(string name, CrawlerArgs args, CustomResourceOptions? opts = null)
name string
The unique name of the resource.
args CrawlerArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.
resource_name str
The unique name of the resource.
opts ResourceOptions
A bag of options that control this resource's behavior.
ctx Context
Context object for the current deployment.
name string
The unique name of the resource.
args CrawlerArgs
The arguments to resource properties.
opts ResourceOption
Bag of options to control resource's behavior.
name string
The unique name of the resource.
args CrawlerArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.

Crawler Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Programming Model docs.

Inputs

The Crawler resource accepts the following input properties:

DatabaseName string

Glue database where results are written.

Role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

CatalogTargets List<CrawlerCatalogTargetArgs>
Classifiers List<string>

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

Configuration string

JSON string of configuration information.

Description string

Description of the crawler.

DynamodbTargets List<CrawlerDynamodbTargetArgs>

List of nested DynamoDB target arguments. See below.

JdbcTargets List<CrawlerJdbcTargetArgs>

List of nested JBDC target arguments. See below.

Name string

Name of the crawler.

S3Targets List<CrawlerS3TargetArgs>

List nested Amazon S3 target arguments. See below.

Schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

SchemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler’s update and deletion behavior.

SecurityConfiguration string

The name of Security Configuration to be used by the crawler

TablePrefix string

The table prefix used for catalog tables that are created.

Tags Dictionary<string, string>

Key-value map of resource tags

DatabaseName string

Glue database where results are written.

Role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

CatalogTargets []CrawlerCatalogTarget
Classifiers []string

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

Configuration string

JSON string of configuration information.

Description string

Description of the crawler.

DynamodbTargets []CrawlerDynamodbTarget

List of nested DynamoDB target arguments. See below.

JdbcTargets []CrawlerJdbcTarget

List of nested JBDC target arguments. See below.

Name string

Name of the crawler.

S3Targets []CrawlerS3Target

List nested Amazon S3 target arguments. See below.

Schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

SchemaChangePolicy CrawlerSchemaChangePolicy

Policy for the crawler’s update and deletion behavior.

SecurityConfiguration string

The name of Security Configuration to be used by the crawler

TablePrefix string

The table prefix used for catalog tables that are created.

Tags map[string]string

Key-value map of resource tags

databaseName string

Glue database where results are written.

role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

catalogTargets CrawlerCatalogTarget[]
classifiers string[]

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration string

JSON string of configuration information.

description string

Description of the crawler.

dynamodbTargets CrawlerDynamodbTarget[]

List of nested DynamoDB target arguments. See below.

jdbcTargets CrawlerJdbcTarget[]

List of nested JBDC target arguments. See below.

name string

Name of the crawler.

s3Targets CrawlerS3Target[]

List nested Amazon S3 target arguments. See below.

schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy CrawlerSchemaChangePolicy

Policy for the crawler’s update and deletion behavior.

securityConfiguration string

The name of Security Configuration to be used by the crawler

tablePrefix string

The table prefix used for catalog tables that are created.

tags {[key: string]: string}

Key-value map of resource tags

database_name str

Glue database where results are written.

role str

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

catalog_targets List[CrawlerCatalogTarget]
classifiers List[str]

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration str

JSON string of configuration information.

description str

Description of the crawler.

dynamodb_targets List[CrawlerDynamodbTarget]

List of nested DynamoDB target arguments. See below.

jdbc_targets List[CrawlerJdbcTarget]

List of nested JBDC target arguments. See below.

name str

Name of the crawler.

s3_targets List[CrawlerS3Target]

List nested Amazon S3 target arguments. See below.

schedule str

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schema_change_policy Dict[CrawlerSchemaChangePolicy]

Policy for the crawler’s update and deletion behavior.

security_configuration str

The name of Security Configuration to be used by the crawler

table_prefix str

The table prefix used for catalog tables that are created.

tags Dict[str, str]

Key-value map of resource tags

Outputs

All input properties are implicitly available as output properties. Additionally, the Crawler resource produces the following output properties:

Arn string

The ARN of the crawler

Id string
The provider-assigned unique ID for this managed resource.
Arn string

The ARN of the crawler

Id string
The provider-assigned unique ID for this managed resource.
arn string

The ARN of the crawler

id string
The provider-assigned unique ID for this managed resource.
arn str

The ARN of the crawler

id str
The provider-assigned unique ID for this managed resource.

Look up an Existing Crawler Resource

Get an existing Crawler resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: CrawlerState, opts?: CustomResourceOptions): Crawler
static get(resource_name, id, opts=None, arn=None, catalog_targets=None, classifiers=None, configuration=None, database_name=None, description=None, dynamodb_targets=None, jdbc_targets=None, name=None, role=None, s3_targets=None, schedule=None, schema_change_policy=None, security_configuration=None, table_prefix=None, tags=None, __props__=None);
func GetCrawler(ctx *Context, name string, id IDInput, state *CrawlerState, opts ...ResourceOption) (*Crawler, error)
public static Crawler Get(string name, Input<string> id, CrawlerState? state, CustomResourceOptions? opts = null)
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
resource_name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.

The following state arguments are supported:

Arn string

The ARN of the crawler

CatalogTargets List<CrawlerCatalogTargetArgs>
Classifiers List<string>

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

Configuration string

JSON string of configuration information.

DatabaseName string

Glue database where results are written.

Description string

Description of the crawler.

DynamodbTargets List<CrawlerDynamodbTargetArgs>

List of nested DynamoDB target arguments. See below.

JdbcTargets List<CrawlerJdbcTargetArgs>

List of nested JBDC target arguments. See below.

Name string

Name of the crawler.

Role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

S3Targets List<CrawlerS3TargetArgs>

List nested Amazon S3 target arguments. See below.

Schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

SchemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler’s update and deletion behavior.

SecurityConfiguration string

The name of Security Configuration to be used by the crawler

TablePrefix string

The table prefix used for catalog tables that are created.

Tags Dictionary<string, string>

Key-value map of resource tags

Arn string

The ARN of the crawler

CatalogTargets []CrawlerCatalogTarget
Classifiers []string

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

Configuration string

JSON string of configuration information.

DatabaseName string

Glue database where results are written.

Description string

Description of the crawler.

DynamodbTargets []CrawlerDynamodbTarget

List of nested DynamoDB target arguments. See below.

JdbcTargets []CrawlerJdbcTarget

List of nested JBDC target arguments. See below.

Name string

Name of the crawler.

Role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

S3Targets []CrawlerS3Target

List nested Amazon S3 target arguments. See below.

Schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

SchemaChangePolicy CrawlerSchemaChangePolicy

Policy for the crawler’s update and deletion behavior.

SecurityConfiguration string

The name of Security Configuration to be used by the crawler

TablePrefix string

The table prefix used for catalog tables that are created.

Tags map[string]string

Key-value map of resource tags

arn string

The ARN of the crawler

catalogTargets CrawlerCatalogTarget[]
classifiers string[]

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration string

JSON string of configuration information.

databaseName string

Glue database where results are written.

description string

Description of the crawler.

dynamodbTargets CrawlerDynamodbTarget[]

List of nested DynamoDB target arguments. See below.

jdbcTargets CrawlerJdbcTarget[]

List of nested JBDC target arguments. See below.

name string

Name of the crawler.

role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

s3Targets CrawlerS3Target[]

List nested Amazon S3 target arguments. See below.

schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy CrawlerSchemaChangePolicy

Policy for the crawler’s update and deletion behavior.

securityConfiguration string

The name of Security Configuration to be used by the crawler

tablePrefix string

The table prefix used for catalog tables that are created.

tags {[key: string]: string}

Key-value map of resource tags

arn str

The ARN of the crawler

catalog_targets List[CrawlerCatalogTarget]
classifiers List[str]

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration str

JSON string of configuration information.

database_name str

Glue database where results are written.

description str

Description of the crawler.

dynamodb_targets List[CrawlerDynamodbTarget]

List of nested DynamoDB target arguments. See below.

jdbc_targets List[CrawlerJdbcTarget]

List of nested JBDC target arguments. See below.

name str

Name of the crawler.

role str

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

s3_targets List[CrawlerS3Target]

List nested Amazon S3 target arguments. See below.

schedule str

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schema_change_policy Dict[CrawlerSchemaChangePolicy]

Policy for the crawler’s update and deletion behavior.

security_configuration str

The name of Security Configuration to be used by the crawler

table_prefix str

The table prefix used for catalog tables that are created.

tags Dict[str, str]

Key-value map of resource tags

Supporting Types

CrawlerCatalogTarget

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

DatabaseName string

The name of the Glue database to be synchronized.

Tables List<string>

A list of catalog tables to be synchronized.

DatabaseName string

The name of the Glue database to be synchronized.

Tables []string

A list of catalog tables to be synchronized.

databaseName string

The name of the Glue database to be synchronized.

tables string[]

A list of catalog tables to be synchronized.

database_name str

The name of the Glue database to be synchronized.

tables List[str]

A list of catalog tables to be synchronized.

CrawlerDynamodbTarget

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

Path string

The name of the DynamoDB table to crawl.

Path string

The name of the DynamoDB table to crawl.

path string

The name of the DynamoDB table to crawl.

path str

The name of the DynamoDB table to crawl.

CrawlerJdbcTarget

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

ConnectionName string

The name of the connection to use to connect to the JDBC target.

Path string

The path of the JDBC target.

Exclusions List<string>

A list of glob patterns used to exclude from the crawl.

ConnectionName string

The name of the connection to use to connect to the JDBC target.

Path string

The path of the JDBC target.

Exclusions []string

A list of glob patterns used to exclude from the crawl.

connectionName string

The name of the connection to use to connect to the JDBC target.

path string

The path of the JDBC target.

exclusions string[]

A list of glob patterns used to exclude from the crawl.

connectionName str

The name of the connection to use to connect to the JDBC target.

path str

The path of the JDBC target.

exclusions List[str]

A list of glob patterns used to exclude from the crawl.

CrawlerS3Target

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

Path string

The name of the DynamoDB table to crawl.

Exclusions List<string>

A list of glob patterns used to exclude from the crawl.

Path string

The name of the DynamoDB table to crawl.

Exclusions []string

A list of glob patterns used to exclude from the crawl.

path string

The name of the DynamoDB table to crawl.

exclusions string[]

A list of glob patterns used to exclude from the crawl.

path str

The name of the DynamoDB table to crawl.

exclusions List[str]

A list of glob patterns used to exclude from the crawl.

CrawlerSchemaChangePolicy

See the input and output API doc for this type.

See the input and output API doc for this type.

See the input and output API doc for this type.

DeleteBehavior string

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

UpdateBehavior string

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

DeleteBehavior string

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

UpdateBehavior string

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

deleteBehavior string

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

updateBehavior string

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

deleteBehavior str

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

updateBehavior str

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

Package Details

Repository
https://github.com/pulumi/pulumi-aws
License
Apache-2.0
Notes
This Pulumi package is based on the aws Terraform Provider.