| Attribute |
Value |
| dag_id |
crawlers |
| duration |
None |
| end_date |
None |
| execution_date |
2025-07-27T08:47:12.795487+00:00 |
| executor_config |
{} |
| generate_command |
<function TaskInstance.generate_command at 0x7fc36a440b70> |
| hostname |
|
| is_premature |
False |
| job_id |
None |
| key |
('crawlers', 'glassdoor-crawl', <Pendulum [2025-07-27T08:47:12.795487+00:00]>, 1) |
| log |
<Logger airflow.task (INFO)> |
| log_filepath |
/usr/local/airflow/logs/crawlers/glassdoor-crawl/2025-07-27T08:47:12.795487+00:00.log |
| log_url |
http://localhost:8080/admin/airflow/log?dag_id=crawlers&task_id=glassdoor-crawl&execution_date=2025-07-27T08%3A47%3A12.795487%2B00%3A00 |
| logger |
<Logger airflow.task (INFO)> |
| mark_success_url |
http://localhost:8080/admin/airflow/success?task_id=glassdoor-crawl&dag_id=crawlers&execution_date=2025-07-27T08%3A47%3A12.795487%2B00%3A00&upstream=false&downstream=false |
| max_tries |
2 |
| metadata |
MetaData(bind=None) |
| next_try_number |
1 |
| operator |
None |
| pid |
None |
| pool |
general |
| previous_ti |
<TaskInstance: crawlers.glassdoor-crawl 2025-06-12 06:19:45.088240+00:00 [None]> |
| priority_weight |
3 |
| queue |
default |
| queued_dttm |
None |
| raw |
False |
| run_as_user |
None |
| start_date |
None |
| state |
None |
| task |
<Task(DockerOperator): glassdoor-crawl> |
| task_id |
glassdoor-crawl |
| test_mode |
False |
| try_number |
1 |
| unixname |
airflow |
| Attribute |
Value |
| adhoc |
False |
| api_version |
None |
| cli |
None |
| command |
python -m scrapy crawl glassdoor |
| container |
None |
| cpus |
1.0 |
| dag |
<DAG: crawlers> |
| dag_id |
crawlers |
| depends_on_past |
True |
| deps |
{<TIDep(Previous Dagrun State)>, <TIDep(Trigger Rule)>, <TIDep(Not In Retry Period)>} |
| dns |
None |
| dns_search |
None |
| docker_conn_id |
None |
| docker_url |
unix://var/run/docker.sock |
| downstream_list |
[<Task(DummyOperator): Wait>] |
| downstream_task_ids |
{'Wait'} |
| email |
['airflow@airflow.com'] |
| email_on_failure |
False |
| email_on_retry |
False |
| end_date |
None |
| environment |
{} |
| execution_timeout |
None |
| executor_config |
{} |
| force_pull |
False |
| image |
oc:airflow |
| inlets |
[] |
| lineage_data |
None |
| log |
<Logger airflow.task.operators (INFO)> |
| logger |
<Logger airflow.task.operators (INFO)> |
| max_retry_delay |
None |
| mem_limit |
None |
| network_mode |
host |
| on_failure_callback |
None |
| on_retry_callback |
None |
| on_success_callback |
None |
| outlets |
[] |
| owner |
airflow |
| params |
{} |
| pool |
general |
| priority_weight |
1 |
| priority_weight_total |
3 |
| queue |
default |
| resources |
{'cpus': {'_name': 'CPU', '_units_str': 'core(s)', '_qty': 1}, 'ram': {'_name': 'RAM', '_units_str': 'MB', '_qty': 512}, 'disk': {'_name': 'Disk', '_units_str': 'MB', '_qty': 512}, 'gpus': {'_name': 'GPU', '_units_str': 'gpu(s)', '_qty': 0}} |
| retries |
2 |
| retry_delay |
0:30:00 |
| retry_exponential_backoff |
False |
| run_as_user |
None |
| schedule_interval |
0 0 * * * |
| shm_size |
None |
| sla |
None |
| start_date |
2020-12-17T00:00:00+00:00 |
| task_concurrency |
None |
| task_id |
glassdoor-crawl |
| task_type |
DockerOperator |
| template_ext |
('.sh', '.bash') |
| template_fields |
('command', 'environment') |
| tls_ca_cert |
None |
| tls_client_cert |
None |
| tls_client_key |
None |
| tls_hostname |
None |
| tls_ssl_version |
None |
| tmp_dir |
/tmp/airflow |
| trigger_rule |
all_success |
| ui_color |
#fff |
| ui_fgcolor |
#000 |
| upstream_list |
[<Task(DummyOperator): Start>] |
| upstream_task_ids |
{'Start'} |
| user |
None |
| volumes |
[] |
| wait_for_downstream |
False |
| weight_rule |
downstream |
| working_dir |
None |
| xcom_all |
False |
| xcom_push_flag |
False |