Attribute |
Value |
dag_id |
crawlers_hourly |
duration |
None |
end_date |
2025-04-21 02:06:54.346960+00:00 |
execution_date |
2025-04-21T01:00:00+00:00 |
executor_config |
{} |
generate_command |
<function TaskInstance.generate_command at 0x7f2ad4931b70> |
hostname |
68bb9f6dae8e |
is_premature |
False |
job_id |
None |
key |
('crawlers_hourly', 'linkedin-crawl', <Pendulum [2025-04-21T01:00:00+00:00]>, 2) |
log |
<Logger airflow.task (INFO)> |
log_filepath |
/usr/local/airflow/logs/crawlers_hourly/linkedin-crawl/2025-04-21T01:00:00+00:00.log |
log_url |
http://localhost:8080/admin/airflow/log?dag_id=crawlers_hourly&task_id=linkedin-crawl&execution_date=2025-04-21T01%3A00%3A00%2B00%3A00 |
logger |
<Logger airflow.task (INFO)> |
mark_success_url |
http://localhost:8080/admin/airflow/success?task_id=linkedin-crawl&dag_id=crawlers_hourly&execution_date=2025-04-21T01%3A00%3A00%2B00%3A00&upstream=false&downstream=false |
max_tries |
2 |
metadata |
MetaData(bind=None) |
next_try_number |
2 |
operator |
None |
pid |
800253 |
pool |
general |
previous_ti |
<TaskInstance: crawlers_hourly.linkedin-crawl 2025-04-21 00:00:00+00:00 [success]> |
priority_weight |
3 |
queue |
default |
queued_dttm |
None |
raw |
False |
run_as_user |
None |
start_date |
2025-04-21 02:00:05.981833+00:00 |
state |
success |
task |
<Task(DockerOperator): linkedin-crawl> |
task_id |
linkedin-crawl |
test_mode |
False |
try_number |
2 |
unixname |
airflow |
Attribute |
Value |
adhoc |
False |
api_version |
None |
cli |
None |
command |
python -m scrapy crawl linkedin |
container |
None |
cpus |
1.0 |
dag |
<DAG: crawlers_hourly> |
dag_id |
crawlers_hourly |
depends_on_past |
True |
deps |
{<TIDep(Trigger Rule)>, <TIDep(Not In Retry Period)>, <TIDep(Previous Dagrun State)>} |
dns |
None |
dns_search |
None |
docker_conn_id |
None |
docker_url |
unix://var/run/docker.sock |
downstream_list |
[<Task(DummyOperator): Wait>] |
downstream_task_ids |
{'Wait'} |
email |
['airflow@airflow.com'] |
email_on_failure |
False |
email_on_retry |
False |
end_date |
None |
environment |
{} |
execution_timeout |
None |
executor_config |
{} |
force_pull |
False |
image |
oc:airflow |
inlets |
[] |
lineage_data |
None |
log |
<Logger airflow.task.operators (INFO)> |
logger |
<Logger airflow.task.operators (INFO)> |
max_retry_delay |
None |
mem_limit |
None |
network_mode |
host |
on_failure_callback |
None |
on_retry_callback |
None |
on_success_callback |
None |
outlets |
[] |
owner |
airflow |
params |
{} |
pool |
general |
priority_weight |
1 |
priority_weight_total |
3 |
queue |
default |
resources |
{'cpus': {'_name': 'CPU', '_units_str': 'core(s)', '_qty': 1}, 'ram': {'_name': 'RAM', '_units_str': 'MB', '_qty': 512}, 'disk': {'_name': 'Disk', '_units_str': 'MB', '_qty': 512}, 'gpus': {'_name': 'GPU', '_units_str': 'gpu(s)', '_qty': 0}} |
retries |
2 |
retry_delay |
0:30:00 |
retry_exponential_backoff |
False |
run_as_user |
None |
schedule_interval |
0 * * * * |
shm_size |
None |
sla |
None |
start_date |
2020-12-17T00:00:00+00:00 |
task_concurrency |
None |
task_id |
linkedin-crawl |
task_type |
DockerOperator |
template_ext |
('.sh', '.bash') |
template_fields |
('command', 'environment') |
tls_ca_cert |
None |
tls_client_cert |
None |
tls_client_key |
None |
tls_hostname |
None |
tls_ssl_version |
None |
tmp_dir |
/tmp/airflow |
trigger_rule |
all_success |
ui_color |
#fff |
ui_fgcolor |
#000 |
upstream_list |
[<Task(DummyOperator): Start>] |
upstream_task_ids |
{'Start'} |
user |
None |
volumes |
[] |
wait_for_downstream |
False |
weight_rule |
downstream |
working_dir |
None |
xcom_all |
False |
xcom_push_flag |
False |