Interview Ready Questions on Group By and Window Functions in SQL: Exploring the World Population Dataset.
1. Introduction -
Database: MS SQL SERVER
We have data from 20 countries on which we are performing the Grouping and Advance SQL functions like Window Functions.
For grouping follow - FWGH (From - Where - Group BY - Having) Sequence.
2. Create & Insert Statement -
Creating Database
create database Country;
use Country;
Creating Table
-- Create table
CREATE TABLE Country (
id INT,
code VARCHAR(2),
name VARCHAR(100),
area INT,
area_land INT,
area_water INT,
population BIGINT,
population_growth DECIMAL(4, 2),
birth_rate DECIMAL(5, 2),
death_rate DECIMAL(5, 2),
migration_rate DECIMAL(5, 2),
continent VARCHAR(50)
);
Insert Statement
INSERT INTO Country (id, code, name, area, area_land, area_water, population, population_growth, birth_rate, death_rate, migration_rate, continent)
VALUES
(1, 'af', 'Afghanistan', 652230, 652230, 0, 32564342, 2.32, 38.57, 13.89, 1.51, 'Asia'),
(2, 'al', 'Albania', 28748, 27398, 1350, 3029278, 0.3, 12.92, 6.58, 3.3, 'Europe'),
(3, 'ag', 'Algeria', 2381741, 2381741, 0, 39542166, 1.84, 23.67, 4.31, 0.92, 'Africa'),
(4, 'an', 'Andorra', 468, 468, 0, 85580, 0.12, 8.13, 6.96, 0.0, 'Europe'),
(5, 'ao', 'Angola', 1246700, 1246700, 0, 19625353, 2.78, 38.78, 11.49, 0.46, 'Africa'),
(6, 'aq', 'Antarctica', 14000000, 280000, 13720000, 1106, NULL, NULL, NULL, NULL, 'Antarctica'),
(7, 'ar', 'Argentina', 2780400, 2736690, 43710, 43431886, 0.93, 16.64, 7.33, 0.0, 'South America'),
(8, 'am', 'Armenia', 29800, 28203, 1597, 3056382, 0.2, 13.61, 9.34, 0.0, 'Asia'),
(9, 'aw', 'Aruba', 180, 180, 0, 112162, 1.22, 12.67, 8.68, 8.09, 'North America'),
(10, 'au', 'Australia', 7692024, 7682300, 9724, 22751014, 1.05, 12.15, 7.14, 5.65, 'Oceania'),
(11, 'at', 'Austria', 83858, 82445, 1413, 8665550, 0.55, 9.41, 9.42, 5.56, 'Europe'),
(12, 'az', 'Azerbaijan', 86600, 82629, 3971, 9780780, 0.96, 16.64, 7.07, 0.0, 'Asia'),
(13, 'bs', 'Bahamas', 13880, 10010, 3870, 324597, 0.95, 15.5, 7.29, 2.01, 'North America'),
(14, 'bh', 'Bahrain', 760, 760, 0, 1346613, 4.51, 15.43, 2.71, 13.31, 'Asia'),
(15, 'bd', 'Bangladesh', 147570, 130170, 17400, 156186882, 1.6, 19.68, 5.96, 0.0, 'Asia'),
(16, 'bb', 'Barbados', 430, 430, 0, 290604, 0.31, 11.87, 8.69, 0.0, 'North America'),
(17, 'by', 'Belarus', 207600, 202900, 4700, 9589689, 0.2, 10.7, 13.36, 0.7, 'Europe'),
(18, 'be', 'Belgium', 30528, 30280, 248, 11323973, 0.66, 11.98, 9.54, 2.91, 'Europe'),
(19, 'bz', 'Belize', 22966, 22806, 160, 347369, 1.97, 24.86, 5.29, 0.0, 'North America'),
(20, 'bj', 'Benin', 112622, 110622, 2000, 10448647, 2.78, 38.85, 8.21, 0.0, 'Africa');
3. Table Preview
Query -
select * from Country2;
Output -
id | code | name | area | area_land | area_water | population | population_growth | birth_rate | death_rate | migration_rate | continent |
1 | af | Afghanistan | 652230 | 652230 | 0 | 32564342 | 2.32 | 38.57 | 13.89 | 1.51 | Asia |
2 | al | Albania | 28748 | 27398 | 1350 | 3029278 | 0.30 | 12.92 | 6.58 | 3.30 | Europe |
3 | ag | Algeria | 2381741 | 2381741 | 0 | 39542166 | 1.84 | 23.67 | 4.31 | 0.92 | Africa |
4 | an | Andorra | 468 | 468 | 0 | 85580 | 0.12 | 8.13 | 6.96 | 0.00 | Europe |
5 | ao | Angola | 1246700 | 1246700 | 0 | 19625353 | 2.78 | 38.78 | 11.49 | 0.46 | Africa |
6 | aq | Antarctica | 14000000 | 280000 | 13720000 | 1106 | NULL | NULL | NULL | NULL | Antarctica |
7 | ar | Argentina | 2780400 | 2736690 | 43710 | 43431886 | 0.93 | 16.64 | 7.33 | 0.00 | South America |
8 | am | Armenia | 29800 | 28203 | 1597 | 3056382 | 0.20 | 13.61 | 9.34 | 0.00 | Asia |
9 | aw | Aruba | 180 | 180 | 0 | 112162 | 1.22 | 12.67 | 8.68 | 8.09 | North America |
10 | au | Australia | 7692024 | 7682300 | 9724 | 22751014 | 1.05 | 12.15 | 7.14 | 5.65 | Oceania |
11 | at | Austria | 83858 | 82445 | 1413 | 8665550 | 0.55 | 9.41 | 9.42 | 5.56 | Europe |
12 | az | Azerbaijan | 86600 | 82629 | 3971 | 9780780 | 0.96 | 16.64 | 7.07 | 0.00 | Asia |
13 | bs | Bahamas | 13880 | 10010 | 3870 | 324597 | 0.95 | 15.50 | 7.29 | 2.01 | North America |
14 | bh | Bahrain | 760 | 760 | 0 | 1346613 | 4.51 | 15.43 | 2.71 | 13.31 | Asia |
15 | bd | Bangladesh | 147570 | 130170 | 17400 | 156186882 | 1.60 | 19.68 | 5.96 | 0.00 | Asia |
16 | bb | Barbados | 430 | 430 | 0 | 290604 | 0.31 | 11.87 | 8.69 | 0.00 | North America |
17 | by | Belarus | 207600 | 202900 | 4700 | 9589689 | 0.20 | 10.70 | 13.36 | 0.70 | Europe |
18 | be | Belgium | 30528 | 30280 | 248 | 11323973 | 0.66 | 11.98 | 9.54 | 2.91 | Europe |
19 | bz | Belize | 22966 | 22806 | 160 | 347369 | 1.97 | 24.86 | 5.29 | 0.00 | North America |
20 | bj | Benin | 112622 | 110622 | 2000 | 10448647 | 2.78 | 38.85 | 8.21 | 0.00 | Africa |
4. Questions
Question 1. - --Find the total population of each continent.
- Query:
select sum(population) total_population, continent
from Country2
group by continent;
- Output
total_population | continent |
69616166 | Africa |
1106 | Antarctica |
202934999 | Asia |
32694070 | Europe |
1074732 | North America |
22751014 | Oceania |
43431886 | South America |
Question 2. - Calculate the average area_land for countries in each continent.
- Query:
select avg(area_land) avg_land_area, continent
from country2
group by continent;
- Output
avg_land_area | continent |
1246354 | Africa |
280000 | Antarctica |
178798 | Asia |
68698 | Europe |
8356 | North America |
7682300 | Oceania |
2736690 | South America |
Question 3. - List the continents with 5 or more than 5 countries.
- Query:
select continent, count(name) as country_count
from Country2
group by continent
having count(name) >= 5;
- Output
continent | country_count |
Asia | 5 |
Europe | 5 |
Question 4. - Show the highest population among countries for each continent.
- Query:
select foo.name, foo.population, foo.continent from
(select max(population) over(partition by continent) as max_pop,
population, name, continent
from Country2) foo
where foo.max_pop = foo.population
- Output
name | population | continent |
Algeria | 39542166 | Africa |
Antarctica | 1106 | Antarctica |
Bangladesh | 156186882 | Asia |
Belgium | 11323973 | Europe |
Belize | 347369 | North America |
Australia | 22751014 | Oceania |
Argentina | 43431886 | South America |
Question 6. - Display the continents where the average birth rate is higher than 20.
- Query:
select avg(birth_rate) avg_birth_rate, continent
from Country2
group by continent
having avg(birth_rate) > 20
- Output
avg_birth_rate | continent |
33.766666 | Africa |
20.786000 | Asia |
Question 7. - List the continents with at least two country having a death rate above 10.
- Query:
select continent from Country2
where death_rate > 10
group by continent
having count(name)>=1
- Output
continent |
Africa |
Asia |
Europe |
Question 8. - Show the continents with more than 3 countries and a total population greater than 100 million.
- Query:
select continent,sum(population) sum_pop,count(name) country_count
from Country2
group by continent
having sum(population) > 100000000
and
count(name)>3
- Output
continent | sum_pop | country_count |
Asia | 202934999 | 5 |
Question 9. - Find the average population growth rate for countries in each continent.
- Query:
select avg(population_growth) avg_population, continent
from Country2
group by continent
- Output
avg_population | continent |
2.466666 | Africa |
NULL | Antarctica |
1.918000 | Asia |
0.366000 | Europe |
1.112500 | North America |
1.050000 | Oceania |
0.930000 | South America |
Question 10. - Find the continents where the average migration rate is less than 1.
- Query:
select continent, avg(migration_rate) avg_mig_rate
from Country2
group by continent
having avg(migration_rate)<1
- Output
continent | avg_mig_rate |
Africa | 0.460000 |
South America | 0.000000 |
Question 11. - Query name of countries where birth_rate is less than avg_birth rate of continent.
- Query:
select a.name,a.birth_rate,
a.avg_birthrate,continent
from(
select
avg(birth_rate) over(partition by continent)
avg_birthrate, continent,birth_rate,name
from Country2
) a
where a.birth_rate < a.avg_birthrate
- Output
name | birth_rate | avg_birthrate | continent |
Algeria | 23.67 | 33.766666 | Africa |
Armenia | 13.61 | 20.786000 | Asia |
Azerbaijan | 16.64 | 20.786000 | Asia |
Bahrain | 15.43 | 20.786000 | Asia |
Bangladesh | 19.68 | 20.786000 | Asia |
Austria | 9.41 | 10.628000 | Europe |
Andorra | 8.13 | 10.628000 | Europe |
Aruba | 12.67 | 16.225000 | North America |
Barbados | 11.87 | 16.225000 | North America |
Bahamas | 15.50 | 16.225000 | North America |
Question 12. - Query Countries which have maximum population growth in their continent.
- Query:
select foo.name,
foo.Max_pop_growth,
foo.population_growth,
foo.continent
from
(
select max(population_growth) over(partition by continent) Max_pop_growth,
name,continent,
population_growth
from Country2
) foo
where foo.Max_pop_growth = foo.population_growth
- Output
name | Max_pop_growth | population_growth | continent |
Angola | 2.78 | 2.78 | Africa |
Benin | 2.78 | 2.78 | Africa |
Bahrain | 4.51 | 4.51 | Asia |
Belgium | 0.66 | 0.66 | Europe |
Belize | 1.97 | 1.97 | North America |
Australia | 1.05 | 1.05 | Oceania |
Argentina | 0.93 | 0.93 | South America |
Aruba | 12.67 | 16.225000 | North America |
Barbados | 11.87 | 16.225000 | North America |
Bahamas | 15.50 | 16.225000 | North America |
Question 13. - For each country, calculate the percentage of the total population it represents within its continent.
- Query:
select foo.name,foo.continent,
foo.population,
foo.sum_pop as continent_tot_pop,
(foo.population* 100)/foo.sum_pop as percentage_by_continent
from
(select continent,
name,population,
sum(population) over(partition by continent) sum_pop from Country2) foo;
- Output
name | continent | population | continent_tot_pop | percentage_by_continent |
Algeria | Africa | 39542166 | 69616166 | 56 |
Angola | Africa | 19625353 | 69616166 | 28 |
Benin | Africa | 10448647 | 69616166 | 15 |
Antarctica | Antarctica | 1106 | 1106 | 100 |
Armenia | Asia | 3056382 | 202934999 | 1 |
Afghanistan | Asia | 32564342 | 202934999 | 16 |
Azerbaijan | Asia | 9780780 | 202934999 | 4 |
Bahrain | Asia | 1346613 | 202934999 | 0 |
Bangladesh | Asia | 156186882 | 202934999 | 76 |
Austria | Europe | 8665550 | 32694070 | 26 |
Belarus | Europe | 9589689 | 32694070 | 29 |
Belgium | Europe | 11323973 | 32694070 | 34 |
Albania | Europe | 3029278 | 32694070 | 9 |
Andorra | Europe | 85580 | 32694070 | 0 |
Aruba | North America | 112162 | 1074732 | 10 |
Belize | North America | 347369 | 1074732 | 32 |
Barbados | North America | 290604 | 1074732 | 27 |
Bahamas | North America | 324597 | 1074732 | 30 |
Australia | Oceania | 22751014 | 22751014 | 100 |
Argentina | South America | 43431886 | 43431886 | 100 |
Question 14. - Rank the countries in each continent based on their population.
- Query:
select name,continent,population,
Rank() over(partition by continent order by population) as country_rank
from Country2;
- Output
name | continent | population | country_rank | percentage_by_continent |
Benin | Africa | 10448647 | 1 | 56 |
Angola | Africa | 19625353 | 2 | 28 |
Algeria | Africa | 39542166 | 3 | 15 |
Antarctica | Antarctica | 1106 | 1 | 100 |
Bahrain | Asia | 1346613 | 1 | 1 |
Armenia | Asia | 3056382 | 2 | 16 |
Azerbaijan | Asia | 9780780 | 3 | 4 |
Afghanistan | Asia | 32564342 | 4 | 0 |
Bangladesh | Asia | 156186882 | 5 | 76 |
Andorra | Europe | 85580 | 1 | 26 |
Albania | Europe | 3029278 | 2 | 29 |
Austria | Europe | 8665550 | 3 | 34 |
Belarus | Europe | 9589689 | 4 | 9 |
Belgium | Europe | 11323973 | 5 | 0 |
Aruba | North America | 112162 | 1 | 10 |
Barbados | North America | 290604 | 2 | 32 |
Bahamas | North America | 324597 | 3 | 27 |
Belize | North America | 347369 | 4 | 30 |
Australia | Oceania | 22751014 | 1 | 100 |
Argentina | South America | 43431886 | 1 | 100 |
Question 15. - Calculate the cumulative population for each continent in ascending order based on their population.
- Query:
select continent,population,
sum(population) over(partition by continent order by population) sum_pop
from Country2;
- Output
continent | population | sum_pop |
Africa | 10448647 | 10448647 |
Africa | 19625353 | 30074000 |
Africa | 39542166 | 69616166 |
Antarctica | 1106 | 1106 |
Asia | 1346613 | 1346613 |
Asia | 3056382 | 4402995 |
Asia | 9780780 | 14183775 |
Asia | 32564342 | 46748117 |
Asia | 156186882 | 202934999 |
Europe | 85580 | 85580 |
Europe | 3029278 | 3114858 |
Europe | 8665550 | 11780408 |
Europe | 9589689 | 21370097 |
Europe | 11323973 | 32694070 |
North America | 112162 | 112162 |
North America | 290604 | 402766 |
North America | 324597 | 727363 |
North America | 347369 | 1074732 |
Oceania | 22751014 | 22751014 |
South America | 43431886 | 43431886 |
Question 16. - Lead Lag Questions.
- Query:
select name, area,
lag(area) over(order by name) as previous_area,
lead(area) over(order by name) as next_area
from Country2;
- Output
name | area | previous_area | next_area |
Afghanistan | 652230 | NULL | 28748 |
Albania | 28748 | 652230 | 2381741 |
Algeria | 2381741 | 28748 | 468 |
Andorra | 468 | 2381741 | 1246700 |
Angola | 1246700 | 468 | 14000000 |
Antarctica | 14000000 | 1246700 | 2780400 |
Argentina | 2780400 | 14000000 | 29800 |
Armenia | 29800 | 2780400 | 180 |
Aruba | 180 | 29800 | 7692024 |
Australia | 7692024 | 180 | 83858 |
Austria | 83858 | 7692024 | 86600 |
Azerbaijan | 86600 | 83858 | 13880 |
Bahamas | 13880 | 86600 | 760 |
Bahrain | 760 | 13880 | 147570 |
Bangladesh | 147570 | 760 | 430 |
Barbados | 430 | 147570 | 207600 |
Belarus | 207600 | 430 | 30528 |
Belgium | 30528 | 207600 | 22966 |
Belize | 22966 | 30528 | 112622 |
Benin | 112622 | 22966 | NULL |
Subscribe to my newsletter
Read articles from The Analyst Geek directly inside your inbox. Subscribe to the newsletter, and don't miss out.
Written by
The Analyst Geek
The Analyst Geek
I am a data enthusiast who finds joy in working with data, utilizing my skills to extract meaningful and valuable insights from it.