Below is an example of a query to find the number of jobs delayed
within certain delay categories within given time buckets. For
example, it finds the jobs in flight during a given hour (e.g., 9 AM to
10 AM). It computes the amount of delay experienced by each job within
that hour. And it counts the number of jobs experiencing delay within
certain delay categories. So, you can say that
N30-60
jobs were delayed
between 30 to 60 minutes,
N15-30
jobs were delayed
between 15 and 30 minutes, etc.
The script variables can be modified as needed. The common table expression jobs_selected can be modified as needed. Choose an appropriate source view: JOBS_BY_USER, JOBS_BY_PROJECT, JOBS_BY_ORGANIZATION, or an archive of one of these. Modify the filters as needed. If you have access to a project_reservation_mapping table, you can uncomment and comment the appropriate lines in the query below.
declare selected_date date default current_date; declare selected_days int64 default 1; declare selected_bucket_minutes int64 default 60; with jobs_selected as ( select j.project_id ,j.job_id ,j.reservation_id ,j.creation_time ,j.start_time ,j.end_time ,j.job_stages -- from `PROJECT_ID.region-us.INFORMATION_SCHEMA.JOBS_BY_ORGANIZATION` j -- from `PROJECT_ID.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT` j -- from `PROJECT_ID.region-us.INFORMATION_SCHEMA.JOBS_BY_USER` j -- default to this query's billing project from `region-us.INFORMATION_SCHEMA.JOBS_BY_USER` j where j.job_type = 'QUERY' -- LOAD, QUERY, COPY, or EXTRACT -- JOBS_BY_ tables partitioned on creation_time by day; -- creation_time is UTC and current_date defaults to UTC and date(j.creation_time) between /* need to include day before to capture all jobs in timeframe */ date_add( selected_date ,interval -1 day ) and date_add( selected_date ,interval selected_days - 1 day ) ) ,jobs_intervals as ( select j.project_id ,j.job_id ,j.reservation_id ,j.creation_time ,j.start_time ,j.end_time ,case when array_length(j.job_stages) > 0 then timestamp_millis( ( select min(js.start_ms) from unnest(j.job_stages) js ) ) else null end as min_stage_start_time ,case when array_length(j.job_stages) > 0 then timestamp_millis( ( select max(js.end_ms) from unnest(j.job_stages) js ) ) else null end as max_stage_end_time ,case when array_length(j.job_stages) > 0 then ( select array_agg( struct( js.start_time ,js.end_time ) order by js.start_time ) from ( /* merge intervals logic from */ /* https://stackoverflow.com/questions/57173813/merge-overlapping-intervals-with-containing-null-values */ select min(js.start_time) as start_time ,nullif( max(js.end_time) ,j.end_time ) as end_time from ( select js.* ,countif( js.prev_max_end_time is null or js.prev_max_end_time < start_time ) over ( order by js.start_time ) as group_nbr from ( select js.* except (end_time) ,coalesce( js.end_time ,j.end_time ) as end_time ,max( coalesce( js.end_time ,j.end_time ) ) over ( order by js.start_time rows between unbounded preceding and 1 preceding ) as prev_max_end_time from ( select timestamp_millis( js.start_ms ) as start_time ,timestamp_millis( js.end_ms ) as end_time from unnest(j.job_stages) js ) js ) js ) js group by js.group_nbr ) js ) else null end as stage_intervals from jobs_selected j ) ,time_buckets as ( select start_time ,timestamp_add( start_time ,interval selected_bucket_minutes minute ) as end_time from unnest( generate_timestamp_array( timestamp(selected_date) ,timestamp( date_add( selected_date ,interval selected_days day ) ) ,interval selected_bucket_minutes minute ) ) start_time where start_time < timestamp( date_add( selected_date ,interval selected_days day ) ) ) /* note that elapsed times calculated here may go negative, */ /* will need to only include those elapsed times that are positive */ ,jobs_intervals_buckets as ( select tb.start_time as bucket_start_time ,j.project_id ,j.creation_time ,j.reservation_id ,timestamp_diff( least( j.start_time ,tb.end_time ) ,greatest( j.creation_time ,tb.start_time ) ,millisecond ) as job_start_delay_ms ,timestamp_diff( least( j.min_stage_start_time ,tb.end_time ) ,greatest( j.start_time ,tb.start_time ) ,millisecond ) as first_stage_delay_ms ,timestamp_diff( least( j.max_stage_end_time ,tb.end_time ) ,greatest( j.min_stage_start_time ,tb.start_time ) ,millisecond ) as stages_elapsed_ms ,( select sum( /* handling negatives here because I have to */ case when least( si.end_time ,tb.end_time ) > greatest( si.start_time ,tb.start_time ) then timestamp_diff( least( si.end_time ,tb.end_time ) ,greatest( si.start_time ,tb.start_time ) ,millisecond ) else 0 end ) from unnest(j.stage_intervals) si ) as stages_active_elapsed_ms ,timestamp_diff( least( j.end_time ,tb.end_time ) ,greatest( j.max_stage_end_time ,tb.start_time ) ,millisecond ) as job_end_delay_ms from jobs_intervals j join time_buckets tb on tb.start_time < j.end_time and tb.end_time > j.creation_time ) /* handling negative values here for most of the ms values */ ,jobs_ms_buckets as ( select j.bucket_start_time ,j.project_id ,j.creation_time ,j.reservation_id ,case when not (j.job_start_delay_ms > 0) then 0 else j.job_start_delay_ms end as job_start_delay_ms ,case when not (j.first_stage_delay_ms > 0) then 0 else j.first_stage_delay_ms end as first_stage_delay_ms ,case when not (j.stages_elapsed_ms > 0) then 0 else j.stages_elapsed_ms end as stages_elapsed_ms ,stages_active_elapsed_ms ,case when not (j.job_end_delay_ms > 0) then 0 else j.job_end_delay_ms end as job_end_delay_ms from jobs_intervals_buckets j ) ,jobs_delay_buckets as ( select j.bucket_start_time ,j.project_id ,j.creation_time ,j.reservation_id ,j.job_start_delay_ms + j.first_stage_delay_ms + ( j.stages_elapsed_ms - j.stages_active_elapsed_ms ) + j.job_end_delay_ms as delay_ms from jobs_ms_buckets j ) -- ,project_reservation_mapping as -- ( -- select -- prm.project_id -- ,nullif( -- prm.reservation -- ,'<deleted>' -- ) as reservation -- ,prm.timestamp as start_time -- ,coalesce( -- timestamp_add( -- lead(prm.timestamp) over ( -- partition by -- prm.project_id -- order by -- timestamp -- ) -- ,interval -1 microsecond -- ) -- ,cast('9999-12-31 23:59:59.999999' as timestamp) -- ) as end_time -- from `RESERVATIONS_PROJECT_ID.RESERVATIONS_DATASET.project_reservation_mapping` prm -- ) ,jobs_delay_categories_buckets as ( select j.bucket_start_time ,j.project_id /* clean up reservation before grouping to ensure no duplicates */ /* remove admin project and region qualifiers (e.g., ADMIN_PROJECT_ID:US.) */ ,regexp_replace( coalesce( /* Google adds 'pac-' prefix in tree hierarchy; remove it */ regexp_replace( /* Google inconsistent about including 'root/' in flat hierarchy; remove it */ regexp_replace( j.reservation_id ,r'^([^:]*):([^.]*)\.root/' ,r'\1:\2.' ) ,r'^pac-' ,'' ) ,'<unknown>' ) ,r'^[^:]+:[^.]+\.' ,'' ) as reservation -- /* remove admin project and region qualifiers (e.g., ADMIN_PROJECT_ID:US.) */ -- ,regexp_replace( -- coalesce( -- /* Google adds 'pac-' prefix in tree hierarchy; remove it */ -- regexp_replace( -- /* Google inconsistent about including 'root/' in flat hierarchy; remove it */ -- regexp_replace( -- j.reservation_id -- ,r'^([^:]*):([^.]*)\.root/' -- ,r'\1:\2.' -- ) -- ,r'^pac-' -- ,'' -- ) -- ,prm.reservation -- ,'default' -- ) -- ,r'^[^:]+:[^.]+\.' -- ,'' -- ) as reservation ,case when coalesce(j.delay_ms, 0) < 1 * 1000 then '1. [0s-1s)' when j.delay_ms < 10 * 1000 then '2. [1s-10s)' when j.delay_ms < 1 * 60 * 1000 then '3. [10s-1m)' when j.delay_ms < 5 * 60 * 1000 then '4. [1m-5m)' when j.delay_ms < 15 * 60 * 1000 then '5. [5m-15m)' when j.delay_ms < 30 * 60 * 1000 then '6. [15m-30m)' else '7. [30m-60m]' end as delay_category ,j.delay_ms from jobs_delay_buckets j -- left join project_reservation_mapping prm -- on -- prm.project_id = j.project_id -- and j.creation_time between prm.start_time and prm.end_time ) select j.bucket_start_time ,j.project_id ,j.reservation ,j.delay_category ,coalesce( sum(j.delay_ms) / 1000 ,0 ) as delay_seconds ,coalesce( max(j.delay_ms) / 1000 ,0 ) as max_delay_seconds ,count(*) as job_count from jobs_delay_categories_buckets j group by j.bucket_start_time ,j.project_id ,j.reservation ,j.delay_category order by j.bucket_start_time ,j.project_id ,j.reservation ,j.delay_category ;