Dependency | Reason |
---|---|
Previous Dagrun State | depends_on_past is true for this task, but the previous task instance |
Dag Not Paused | Task's DAG 'crawlers_hourly' is paused. |
Trigger Rule | Task's trigger rule 'all_success' requires all upstream tasks to have succeeded, but found 1 non-success(es). upstream_tasks_state={'total': 1, 'successes': 0, 'skipped': 0, 'failed': 0, 'upstream_failed': 0, 'done': 0}, upstream_task_ids={'Start'} |
Attribute | Value |
---|---|
dag_id | crawlers_hourly |
duration | None |
end_date | None |
execution_date | 2025-06-11T07:08:32.181597+00:00 |
executor_config | {} |
generate_command | <function TaskInstance.generate_command at 0x7f72d9debb70> |
hostname | |
is_premature | False |
job_id | None |
key | ('crawlers_hourly', 'linkedin-crawl', <Pendulum [2025-06-11T07:08:32.181597+00:00]>, 1) |
log | <Logger airflow.task (INFO)> |
log_filepath | /usr/local/airflow/logs/crawlers_hourly/linkedin-crawl/2025-06-11T07:08:32.181597+00:00.log |
log_url | http://localhost:8080/admin/airflow/log?dag_id=crawlers_hourly&task_id=linkedin-crawl&execution_date=2025-06-11T07%3A08%3A32.181597%2B00%3A00 |
logger | <Logger airflow.task (INFO)> |
mark_success_url | http://localhost:8080/admin/airflow/success?task_id=linkedin-crawl&dag_id=crawlers_hourly&execution_date=2025-06-11T07%3A08%3A32.181597%2B00%3A00&upstream=false&downstream=false |
max_tries | 2 |
metadata | MetaData(bind=None) |
next_try_number | 1 |
operator | None |
pid | None |
pool | general |
previous_ti | <TaskInstance: crawlers_hourly.linkedin-crawl 2025-06-10 05:15:03.328038+00:00 [None]> |
priority_weight | 3 |
queue | default |
queued_dttm | None |
raw | False |
run_as_user | None |
start_date | None |
state | None |
task | <Task(DockerOperator): linkedin-crawl> |
task_id | linkedin-crawl |
test_mode | False |
try_number | 1 |
unixname | airflow |
Attribute | Value |
---|---|
adhoc | False |
api_version | None |
cli | None |
command | python -m scrapy crawl linkedin |
container | None |
cpus | 1.0 |
dag | <DAG: crawlers_hourly> |
dag_id | crawlers_hourly |
depends_on_past | True |
deps | {<TIDep(Previous Dagrun State)>, <TIDep(Not In Retry Period)>, <TIDep(Trigger Rule)>} |
dns | None |
dns_search | None |
docker_conn_id | None |
docker_url | unix://var/run/docker.sock |
downstream_list | [<Task(DummyOperator): Wait>] |
downstream_task_ids | {'Wait'} |
['airflow@airflow.com'] | |
email_on_failure | False |
email_on_retry | False |
end_date | None |
environment | {} |
execution_timeout | None |
executor_config | {} |
force_pull | False |
image | oc:airflow |
inlets | [] |
lineage_data | None |
log | <Logger airflow.task.operators (INFO)> |
logger | <Logger airflow.task.operators (INFO)> |
max_retry_delay | None |
mem_limit | None |
network_mode | host |
on_failure_callback | None |
on_retry_callback | None |
on_success_callback | None |
outlets | [] |
owner | airflow |
params | {} |
pool | general |
priority_weight | 1 |
priority_weight_total | 3 |
queue | default |
resources | {'cpus': {'_name': 'CPU', '_units_str': 'core(s)', '_qty': 1}, 'ram': {'_name': 'RAM', '_units_str': 'MB', '_qty': 512}, 'disk': {'_name': 'Disk', '_units_str': 'MB', '_qty': 512}, 'gpus': {'_name': 'GPU', '_units_str': 'gpu(s)', '_qty': 0}} |
retries | 2 |
retry_delay | 0:30:00 |
retry_exponential_backoff | False |
run_as_user | None |
schedule_interval | 0 * * * * |
shm_size | None |
sla | None |
start_date | 2020-12-17T00:00:00+00:00 |
task_concurrency | None |
task_id | linkedin-crawl |
task_type | DockerOperator |
template_ext | ('.sh', '.bash') |
template_fields | ('command', 'environment') |
tls_ca_cert | None |
tls_client_cert | None |
tls_client_key | None |
tls_hostname | None |
tls_ssl_version | None |
tmp_dir | /tmp/airflow |
trigger_rule | all_success |
ui_color | #fff |
ui_fgcolor | #000 |
upstream_list | [<Task(DummyOperator): Start>] |
upstream_task_ids | {'Start'} |
user | None |
volumes | [] |
wait_for_downstream | False |
weight_rule | downstream |
working_dir | None |
xcom_all | False |
xcom_push_flag | False |