6.1. Primary Key Test

Below is a template for a query to test a set of fields to see if they form a primary key (there is one and only one row per key value).

select
    x.row_cnt
    ,x.key_cnt
    ,format(
        '%5.4f'
        ,cast(x.key_cnt as float64)
            / sum(x.key_cnt) over ()
    ) as key_cnt_pct
    ,format(
        '%5.4f'
        ,cast(x.row_cnt * x.key_cnt as float64)
            / sum(x.row_cnt * x.key_cnt) over ()
    ) as row_cnt_pct
from (
    select
        x.row_cnt
        ,count(*) as key_cnt
    from (
        select
            x.column1
            ,x.column2
            ,count(*) as row_cnt
        from `projectId.datasetId.tableId` x
        group by
            x.column1
            ,x.column2

    ) x
    group by
        x.row_cnt
) x
order by
    x.row_cnt
;

Below is an example of the test that shows that (word) is not a primary key.

select
    x.row_cnt
    ,x.key_cnt
    ,format(
        '%5.4f'
        ,cast(x.key_cnt as float64)
            / sum(x.key_cnt) over ()
    ) as key_cnt_pct
    ,format(
        '%5.4f'
        ,cast(x.row_cnt * x.key_cnt as float64)
            / sum(x.row_cnt * x.key_cnt) over ()
    ) as row_cnt_pct
from (
    select
        x.row_cnt
        ,count(*) as key_cnt
    from (
        select
            x.word
            ,count(*) as row_cnt
        from `bigquery-public-data.samples.shakespeare` x
        group by
            x.word
    ) x
    group by
        x.row_cnt
) x
order by
    x.row_cnt
;
row_cntkey_cntkey_cnt_pctrow_cnt_pct
115,5260.47360.0943
24,7750.14560.0580
32,4510.07480.0447
41,6100.04910.0391
51,2300.03750.0374
............
38750.00230.0173
39730.00220.0173
40920.00280.0223
411260.00380.0314
422130.00650.0543

Below is an example of the test that shows that (word, corpus) is a primary key.

select
    x.row_cnt
    ,x.key_cnt
    ,format(
        '%5.4f'
        ,cast(x.key_cnt as float64)
            / sum(x.key_cnt) over ()
    ) as key_cnt_pct
    ,format(
        '%5.4f'
        ,cast(x.row_cnt * x.key_cnt as float64)
            / sum(x.row_cnt * x.key_cnt) over ()
    ) as row_cnt_pct
from (
    select
        x.row_cnt
        ,count(*) as key_cnt
    from (
        select
            x.word
            ,x.corpus
            ,count(*) as row_cnt
        from `bigquery-public-data.samples.shakespeare` x
        group by
            x.word
            ,x.corpus
    ) x
    group by
        x.row_cnt
) x
order by
    x.row_cnt
;
row_cntkey_cntkey_cnt_pctrow_cnt_pct
1164,6561.00001.0000