DAG: crawlers

schedule: @daily


Task Instance: Start


Task Instance Details

Dependencies Blocking Task From Getting Scheduled
Dependency Reason
Task Instance State Task is in the 'success' state which is not a valid state for execution. The task must be cleared in order to be run.
Task Instance Attributes
Attribute Value
dag_id crawlers
duration None
end_date 2025-04-24 00:00:04.472451+00:00
execution_date 2025-04-23T00:00:00+00:00
executor_config {}
generate_command <function TaskInstance.generate_command at 0x7f2ad4931b70>
hostname 68bb9f6dae8e
is_premature False
job_id None
key ('crawlers', 'Start', <Pendulum [2025-04-23T00:00:00+00:00]>, 2)
log <Logger airflow.task (INFO)>
log_filepath /usr/local/airflow/logs/crawlers/Start/2025-04-23T00:00:00+00:00.log
log_url http://localhost:8080/admin/airflow/log?dag_id=crawlers&task_id=Start&execution_date=2025-04-23T00%3A00%3A00%2B00%3A00
logger <Logger airflow.task (INFO)>
mark_success_url http://localhost:8080/admin/airflow/success?task_id=Start&dag_id=crawlers&execution_date=2025-04-23T00%3A00%3A00%2B00%3A00&upstream=false&downstream=false
max_tries 2
metadata MetaData(bind=None)
next_try_number 2
operator None
pid 1897407
pool general
previous_ti <TaskInstance: crawlers.Start 2025-04-22 00:00:00+00:00 [success]>
priority_weight 45
queue default
queued_dttm None
raw False
run_as_user None
start_date 2025-04-24 00:00:03.294376+00:00
state success
task <Task(DummyOperator): Start>
task_id Start
test_mode False
try_number 2
unixname airflow
Task Attributes
Attribute Value
adhoc False
dag <DAG: crawlers>
dag_id crawlers
depends_on_past True
deps {<TIDep(Trigger Rule)>, <TIDep(Not In Retry Period)>, <TIDep(Previous Dagrun State)>}
downstream_list [<Task(DockerOperator): indeed_tw-crawl>, <Task(DockerOperator): indeed_es-crawl>, <Task(DockerOperator): indeed_malaysia-crawl>, <Task(DockerOperator): indeed_fi-crawl>, <Task(DockerOperator): indeed_korea-crawl>, <Task(DockerOperator): indeed_sa-crawl>, <Task(DockerOperator): indeed_fr-crawl>, <Task(DockerOperator): seek_au-crawl>, <Task(DockerOperator): indeed_in-crawl>, <Task(DockerOperator): indeed_nl-crawl>, <Task(DockerOperator): optioncar-crawl>, <Task(DockerOperator): crypto-crawl>, <Task(DockerOperator): indeed_ro-crawl>, <Task(DockerOperator): indeed_china-crawl>, <Task(DockerOperator): indeed_au-crawl>, <Task(DockerOperator): indeed_brasil-crawl>, <Task(DockerOperator): indeed_it-crawl>, <Task(DockerOperator): polemployee-crawl>, <Task(DockerOperator): dejobs-crawl>, <Task(DockerOperator): indeed_mx-crawl>, <Task(DockerOperator): whoishiring-crawl>, <Task(DockerOperator): glassdoor-crawl>, <Task(DockerOperator): indeed-crawl>, <Task(DockerOperator): indeed_pk-crawl>, <Task(DockerOperator): indeed_ua-crawl>, <Task(DockerOperator): indeed_gr-crawl>, <Task(DockerOperator): reeduk-crawl>, <Task(DockerOperator): careerbuilder-crawl>, <Task(DockerOperator): dice-crawl>, <Task(DockerOperator): indeed_ru-crawl>, <Task(DockerOperator): indeed_uk-crawl>, <Task(DockerOperator): indeed_eg-crawl>, <Task(DockerOperator): opcionempleo-crawl>, <Task(DockerOperator): flexjobs-crawl>, <Task(DockerOperator): indeed_ie-crawl>, <Task(DockerOperator): indeed_pt-crawl>, <Task(DockerOperator): snagajobs-crawl>, <Task(DockerOperator): indeed_ar-crawl>, <Task(DockerOperator): indeed_pl-crawl>, <Task(DockerOperator): indeed_se-crawl>, <Task(DockerOperator): ladders-crawl>, <Task(DockerOperator): indeed_canada-crawl>]
downstream_task_ids {'indeed_tw-crawl', 'indeed_es-crawl', 'indeed_malaysia-crawl', 'indeed_fi-crawl', 'indeed_korea-crawl', 'indeed_sa-crawl', 'indeed_fr-crawl', 'seek_au-crawl', 'indeed_in-crawl', 'indeed_nl-crawl', 'optioncar-crawl', 'crypto-crawl', 'indeed_ro-crawl', 'indeed_china-crawl', 'indeed_au-crawl', 'indeed_brasil-crawl', 'indeed_it-crawl', 'polemployee-crawl', 'dejobs-crawl', 'indeed_mx-crawl', 'whoishiring-crawl', 'glassdoor-crawl', 'indeed-crawl', 'indeed_pk-crawl', 'indeed_ua-crawl', 'indeed_gr-crawl', 'reeduk-crawl', 'careerbuilder-crawl', 'dice-crawl', 'indeed_ru-crawl', 'indeed_uk-crawl', 'indeed_eg-crawl', 'opcionempleo-crawl', 'flexjobs-crawl', 'indeed_ie-crawl', 'indeed_pt-crawl', 'snagajobs-crawl', 'indeed_ar-crawl', 'indeed_pl-crawl', 'indeed_se-crawl', 'ladders-crawl', 'indeed_canada-crawl'}
email ['airflow@airflow.com']
email_on_failure False
email_on_retry False
end_date None
execution_timeout None
executor_config {}
inlets []
lineage_data None
log <Logger airflow.task.operators (INFO)>
logger <Logger airflow.task.operators (INFO)>
max_retry_delay None
on_failure_callback None
on_retry_callback None
on_success_callback None
outlets []
owner airflow
params {}
pool general
priority_weight 1
priority_weight_total 45
queue default
resources {'cpus': {'_name': 'CPU', '_units_str': 'core(s)', '_qty': 1}, 'ram': {'_name': 'RAM', '_units_str': 'MB', '_qty': 512}, 'disk': {'_name': 'Disk', '_units_str': 'MB', '_qty': 512}, 'gpus': {'_name': 'GPU', '_units_str': 'gpu(s)', '_qty': 0}}
retries 2
retry_delay 0:30:00
retry_exponential_backoff False
run_as_user None
schedule_interval 0 0 * * *
sla None
start_date 2020-12-17T00:00:00+00:00
task_concurrency None
task_id Start
task_type DummyOperator
template_ext []
template_fields ()
trigger_rule all_success
ui_color #e8f7e4
ui_fgcolor #000
upstream_list []
upstream_task_ids set()
wait_for_downstream False
weight_rule downstream