DAG: crawlers

schedule: @daily


Task Instance: indeed_in-crawl


Task Instance Details

Dependencies Blocking Task From Getting Scheduled
Dependency Reason
Task Instance State Task is in the 'success' state which is not a valid state for execution. The task must be cleared in order to be run.
Task Instance Attributes
Attribute Value
dag_id crawlers
duration None
end_date 2025-04-24 00:00:50.338760+00:00
execution_date 2025-04-23T00:00:00+00:00
executor_config {}
generate_command <function TaskInstance.generate_command at 0x7f2ad4931b70>
hostname 68bb9f6dae8e
is_premature False
job_id None
key ('crawlers', 'indeed_in-crawl', <Pendulum [2025-04-23T00:00:00+00:00]>, 2)
log <Logger airflow.task (INFO)>
log_filepath /usr/local/airflow/logs/crawlers/indeed_in-crawl/2025-04-23T00:00:00+00:00.log
log_url http://localhost:8080/admin/airflow/log?dag_id=crawlers&task_id=indeed_in-crawl&execution_date=2025-04-23T00%3A00%3A00%2B00%3A00
logger <Logger airflow.task (INFO)>
mark_success_url http://localhost:8080/admin/airflow/success?task_id=indeed_in-crawl&dag_id=crawlers&execution_date=2025-04-23T00%3A00%3A00%2B00%3A00&upstream=false&downstream=false
max_tries 2
metadata MetaData(bind=None)
next_try_number 2
operator None
pid 1897732
pool general
previous_ti <TaskInstance: crawlers.indeed_in-crawl 2025-04-22 00:00:00+00:00 [success]>
priority_weight 3
queue default
queued_dttm None
raw False
run_as_user None
start_date 2025-04-24 00:00:29.329895+00:00
state success
task <Task(DockerOperator): indeed_in-crawl>
task_id indeed_in-crawl
test_mode False
try_number 2
unixname airflow
Task Attributes
Attribute Value
adhoc False
api_version None
cli None
command python -m scrapy crawl indeed_in
container None
cpus 1.0
dag <DAG: crawlers>
dag_id crawlers
depends_on_past True
deps {<TIDep(Trigger Rule)>, <TIDep(Not In Retry Period)>, <TIDep(Previous Dagrun State)>}
dns None
dns_search None
docker_conn_id None
docker_url unix://var/run/docker.sock
downstream_list [<Task(DummyOperator): Wait>]
downstream_task_ids {'Wait'}
email ['airflow@airflow.com']
email_on_failure False
email_on_retry False
end_date None
environment {}
execution_timeout None
executor_config {}
force_pull False
image oc:airflow
inlets []
lineage_data None
log <Logger airflow.task.operators (INFO)>
logger <Logger airflow.task.operators (INFO)>
max_retry_delay None
mem_limit None
network_mode host
on_failure_callback None
on_retry_callback None
on_success_callback None
outlets []
owner airflow
params {}
pool general
priority_weight 1
priority_weight_total 3
queue default
resources {'cpus': {'_name': 'CPU', '_units_str': 'core(s)', '_qty': 1}, 'ram': {'_name': 'RAM', '_units_str': 'MB', '_qty': 512}, 'disk': {'_name': 'Disk', '_units_str': 'MB', '_qty': 512}, 'gpus': {'_name': 'GPU', '_units_str': 'gpu(s)', '_qty': 0}}
retries 2
retry_delay 0:30:00
retry_exponential_backoff False
run_as_user None
schedule_interval 0 0 * * *
shm_size None
sla None
start_date 2020-12-17T00:00:00+00:00
task_concurrency None
task_id indeed_in-crawl
task_type DockerOperator
template_ext ('.sh', '.bash')
template_fields ('command', 'environment')
tls_ca_cert None
tls_client_cert None
tls_client_key None
tls_hostname None
tls_ssl_version None
tmp_dir /tmp/airflow
trigger_rule all_success
ui_color #fff
ui_fgcolor #000
upstream_list [<Task(DummyOperator): Start>]
upstream_task_ids {'Start'}
user None
volumes []
wait_for_downstream False
weight_rule downstream
working_dir None
xcom_all False
xcom_push_flag False