Below is a template for a query to test a set of fields to see if they form a primary key (there is one and only one row per key value).
select
x.row_cnt
,x.key_cnt
,format(
'%5.4f'
,cast(x.key_cnt as float64)
/ sum(x.key_cnt) over ()
) as key_cnt_pct
,format(
'%5.4f'
,cast(x.row_cnt * x.key_cnt as float64)
/ sum(x.row_cnt * x.key_cnt) over ()
) as row_cnt_pct
from (
select
x.row_cnt
,count(*) as key_cnt
from (
select
x.column1
,x.column2
,count(*) as row_cnt
from `projectId.datasetId.tableId` x
group by
x.column1
,x.column2
) x
group by
x.row_cnt
) x
order by
x.row_cnt
;Below is an example of the test that shows that (word) is not a primary key.
select
x.row_cnt
,x.key_cnt
,format(
'%5.4f'
,cast(x.key_cnt as float64)
/ sum(x.key_cnt) over ()
) as key_cnt_pct
,format(
'%5.4f'
,cast(x.row_cnt * x.key_cnt as float64)
/ sum(x.row_cnt * x.key_cnt) over ()
) as row_cnt_pct
from (
select
x.row_cnt
,count(*) as key_cnt
from (
select
x.word
,count(*) as row_cnt
from `bigquery-public-data.samples.shakespeare` x
group by
x.word
) x
group by
x.row_cnt
) x
order by
x.row_cnt
;| row_cnt | key_cnt | key_cnt_pct | row_cnt_pct |
|---|---|---|---|
| 1 | 15,526 | 0.4736 | 0.0943 |
| 2 | 4,775 | 0.1456 | 0.0580 |
| 3 | 2,451 | 0.0748 | 0.0447 |
| 4 | 1,610 | 0.0491 | 0.0391 |
| 5 | 1,230 | 0.0375 | 0.0374 |
| ... | ... | ... | ... |
| 38 | 75 | 0.0023 | 0.0173 |
| 39 | 73 | 0.0022 | 0.0173 |
| 40 | 92 | 0.0028 | 0.0223 |
| 41 | 126 | 0.0038 | 0.0314 |
| 42 | 213 | 0.0065 | 0.0543 |
Below is an example of the test that shows that (word, corpus) is a primary key.
select
x.row_cnt
,x.key_cnt
,format(
'%5.4f'
,cast(x.key_cnt as float64)
/ sum(x.key_cnt) over ()
) as key_cnt_pct
,format(
'%5.4f'
,cast(x.row_cnt * x.key_cnt as float64)
/ sum(x.row_cnt * x.key_cnt) over ()
) as row_cnt_pct
from (
select
x.row_cnt
,count(*) as key_cnt
from (
select
x.word
,x.corpus
,count(*) as row_cnt
from `bigquery-public-data.samples.shakespeare` x
group by
x.word
,x.corpus
) x
group by
x.row_cnt
) x
order by
x.row_cnt
;| row_cnt | key_cnt | key_cnt_pct | row_cnt_pct |
|---|---|---|---|
| 1 | 164,656 | 1.0000 | 1.0000 |