SQL

SQL 4주차: Subquery, With, 문자열, Case, 그리고 문법 정리

수수 아보카도 2022. 11. 28. 00:35

SQL 4주차의 숙제는 이제까지 배운 문법을 정리하기!

일단 쿼리 작성법은 다음과 같다.

1) show tables로 어떤 테이블이 있는지 살펴보기
2) 제일 원하는 정보가 있을 것 같은 테이블에 select * from 테이블명 limit 10 쿼리 날려보기
3) 원하는 정보가 없으면 다른 테이블에도 2)를 해보기
4) 테이블을 찾았다! 범주를 나눠서 보고싶은 필드를 찾기
5) 범주별로 통계를 보고싶은 필드를 찾기
6) SQL 쿼리 작성하기!

1. where 절, select 쿼리문

like의 다양한 사용법

where email like 'a%': email 필드값이 a로 시작하는 모든 데이터
where email like '%a' email 필드값이 a로 끝나는 모든 데이터
where email like '%co%' email 필드값에 co를 포함하는 모든 데이터
where email like 'a%o' email 필드값이 a로 시작하고 o로 끝나는 모든 데이터

limit: 일부 데이터만 가져오기
select * from orders 
where payment_method = "kakaopay"
limit 5;​

distinct: 중복 데이터 제외하고 가져오기

select distinct(payment_method) from orders;​

count: 몇 개인지 세어보기

select count(*) from orders

SELECT count(distinct(name)) from users;

2. group by

동일한 범주를 갖는 데이터를 하나로 묶어서, 범주별 통계를 내주는 것.

select 범주별로 세어주고 싶은 필드명, count(*) from 테이블명
group by 범주별로 세어주고 싶은 필드명

#ex
select name, count(*) from users
group by name

#max
select week, max(likes) from checkins
group by week

#avg
select week, avg(likes) from checkins
group by week

#sum
select week, sum(likes) from checkins
group by week

3. order by

group by로 나눈 애들을 오름차순/내림차순desc으로 정렬

select name, count(*) from users
group by name
order by count(*) desc;

#ex
select name, count(*) from users
group by name
order by count(*);

#desc
select name, count(*) from users
group by name
order by count(*) desc;

4. where, group by 함께 사용하기

select payment_method, count(*) from orders
where email like '%naver.com' and course_title = '앱개발 종합반'
group by payment_method

5. Alias 별칭 기능

select payment_method, count(*) as cnt from orders o
where o.course_title = '앱개발 종합반'
group by payment_method

6. Join 여러 테이블을 연결하기- left join, inner join

select * from users u
left join point_users p
on u.user_id = p.user_id;

select * from users u
inner join point_users p
on u.user_id = p.user_id;

join을 앞에서 배운 문법들과 함께 쓰면 다음과 같다.

# 많은 포인트를 얻은 순서대로 유저 데이터 정렬해서 보기
select * from point_users p
inner join users u 
on p.user_id = u.user_id
order by p.point desc

# 네이버 이메일 사용하는 유저의 성씨별 주문건수 세어보기
select u.name, count(u.name) as count_name from orders o
inner join users u
on o.user_id = u.user_id 
where u.email like '%naver.com'
group by u.name

# 결제 수단 별 유저 포인트의 평균값
select o.payment_method, round(AVG(p.point)) from point_users p
inner join orders o 
on p.user_id = o.user_id 
group by o.payment_method

# 결제하고 시작하지 않은 유저들을 성씨별로 세어보기
select name, count(*) as cnt_name from enrolleds e
inner join users u
on e.user_id = u.user_id 
where is_registered = 0
group by name
order by cnt_name desc

# 과목 별로 시작하지 않은 유저들을 세어보기
select c.course_id, c.title, count(*) as cnt_notstart from courses c
inner join enrolleds e 
on c.course_id = e.course_id
where is_registered = 0
group by c.course_id

# 웹개발, 앱개발 종합반의 week 별 체크인 수 세어보기
select c1.title, c2.week, count(*) as cnt from courses c1
inner join checkins c2 on c1.course_id = c2.course_id
inner join orders o on c2.user_id = o.user_id
where o.created_at >= '2020-08-01'
group by c1.title, c2.week
order by c1.title, c2.week

# 앞에서 한 것을 바탕으로 8월 1일 이후에 구매한 고객들만 발라내기
select c1.title, c2.week, count(*) as cnt from courses c1
inner join checkins c2 on c1.course_id = c2.course_id
inner join orders o on c2.user_id = o.user_id
where o.created_at >= '2020-08-01'
group by c1.title, c2.week
order by c1.title, c2.week

# is NULL, is not NULL
select name, count(*) from users u
left join point_users pu on u.user_id = pu.user_id
where pu.point_user_id is NULL
group by name

select name, count(*) from users u
left join point_users pu on u.user_id = pu.user_id
where pu.point_user_id is not NULL
group by name

# 7월10일 ~ 7월19일에 가입한 고객 중, 포인트를 가진 고객의 숫자, 전체 숫자, 비율
select count(point_user_id) as pnt_user_cnt,
       count(*) as tot_user_cnt,
       round(count(point_user_id)/count(*),2) as ratio
  from users u
  left join point_users pu on u.user_id = pu.user_id
 where u.created_at between '2020-07-10' and '2020-07-20'
 
 # enrolled_id별 수강완료(done=1)한 강의 개수 세고, 완료한 강의 수가 많은 순서대로 정렬하기
 select  e.enrolled_id,
	     e.user_id,
	     count(*) as cnt
  from enrolleds e
 inner join enrolleds_detail ed on e.enrolled_id = ed.enrolled_id
 where ed.done = 1
 group by e.enrolled_id, e.user_id
 order by cnt desc

7. union all: select를 두 번 하는 것이 아니라 한 번에 모아서 볼 때

(
	select '7월' as month, c.title, c2.week, count(*) as cnt from checkins c2
	inner join courses c on c2.course_id = c.course_id
	inner join orders o on o.user_id = c2.user_id
	where o.created_at < '2020-08-01'
	group by c2.course_id, c2.week
  order by c2.course_id, c2.week
)
union all
(
	select '8월' as month, c.title, c2.week, count(*) as cnt from checkins c2
	inner join courses c on c2.course_id = c.course_id
	inner join orders o on o.user_id = c2.user_id
	where o.created_at > '2020-08-01'
	group by c2.course_id, c2.week
  order by c2.course_id, c2.week
)

8. Subquery

subquery는 쿼리 안에 쿼리를 쓰는 것이다. where, select, from 절에서 사용된다.

1) where에 들어갈 때: where 필드명 in (  subquery  )

select * from users u
where u.user_id in (select o.user_id from orders o 
					where o.payment_method = 'kakaopay');
                    
# 이씨 성을 가진 유저들의 평균 포인트보다 더 많은 포인트를 가지고 있는 데이터를 추출하기                    
select * from point_users pu 
where pu.point > 
     (select avg(pu2.point)
from point_users pu2
inner join users u 
on pu2.user_id = u.user_id 
where u.name = "이**");

2) select에 들어갈 때: select 필드명, 필드명, (  subquery  ) from ...

select c.checkin_id, c.user_id, c.likes, 
	(select avg(likes) from checkins c2
where c2.user_id = c.user_id) as avg_like_user
from checkins c;


# checkins 테이블에 course_id별 평균 likes수 필드 우측에 붙여보기
select checkin_id,
      c3.title, user_id,
      likes, 
      (select round(avg(c2.likes),1) from checkins c2
where c.course_id = c2.course_id) as course_avg
from checkins c
inner join courses c3 
on c.course_id = c3.course_id;

3) from에 들어갈 때: 먼저 서브쿼리의 select가 실행되고, 이걸 테이블처럼 여기고 inner join (    ) a on ... = ...

from에 들어가는 subquery 유형이 가장 많이 사용되므로 확실히 배우기.

select pu.user_id, a.avg_like, pu.point from point_users pu
inner join (
	select user_id, round(avg(likes),1) as avg_like from checkins
	group by user_id
) a on pu.user_id = a.user_id


# course_id별 like 개수에 전체 인원을 붙이기, ratio 나타내기, title나타내기
select c.title,
       a.cnt_checkins,
       b.cnt_total,
       (a.cnt_checkins/b.cnt_total) as ratio
from
(
	select course_id, count(distinct(user_id)) as cnt_checkins from checkins
	group by course_id
) a
inner join
(
	select course_id, count(*) as cnt_total from orders
	group by course_id 
) b on a.course_id = b.course_id
inner join courses c on a.course_id = c.course_id

9. With 절

table1, table2 등을 써서 쿼리절을 깔끔하게 나타낼 수 있다.

with table1 as (
	select course_id, count(distinct(user_id)) as cnt_checkins from checkins
	group by course_id
), table2 as (
	select course_id, count(*) as cnt_total from orders
	group by course_id
)

select c.title,
       a.cnt_checkins,
       b.cnt_total,
       (a.cnt_checkins/b.cnt_total) as ratio
from table1 a inner join table2 b on a.course_id = b.course_id
inner join courses c on a.course_id = c.course_id

10. 문자열 데이터

문자열 쪼개기 SUBSTRING-INDEX(email, "@", -1)

문자열 일부분만 출력하기 SUBSTRING(필드, 시작포인트, 글자 수)

select user_id, email, SUBSTRING_INDEX(email, '@', -1) from users

select order_no, created_at, substring(created_at,1,10) as date from orders

select substring(created_at,1,10) as date, count(*) as cnt_date from orders
group by date

11. case

경우에 따라 원하는 값을 새 필드에 출력한다.

select pu.point_user_id, pu.point,
case 
when pu.point > 10000 then '잘 하고 있어요!'
else '조금 더 달려주세요!'
END as '구분'
from point_users pu;

# 서브쿼리 이용해 group by로 통계내기
select level, count(*) as cnt from (
	select pu.point_user_id, pu.point,
	case 
	when pu.point > 10000 then '1만 이상'
	when pu.point > 5000 then '5천 이상'
	else '5천 미만'
	END as lv
	from point_users pu
) a
group by lv

# with절 함께 사용
with table1 as (
	select pu.point_user_id, pu.point,
	case 
	when pu.point > 10000 then '1만 이상'
	when pu.point > 5000 then '5천 이상'
	else '5천 미만'
	END as lv
	from point_users pu
)

select level, count(*) as cnt from table1
group by lv

 

12. 퀴즈들...

# 평균 이상 포인트를 가지고 있으면 '잘 하고 있어요' / 낮으면 '열심히 합시다!' 표시하기
select pu.point_user_id, pu.point,
	(case when pu.point > (select avg(pu2.point) from point_users pu2) then '잘 하고 있어요!'
	else '열심히 합시다!' end) as 'msg'
from point_users pu

# 이메일 도메인별 유저의 수 세어보기
select domain, count(*) as cnt from (
	select SUBSTRING_INDEX(email,'@',-1) as domain from users
) a
group by domain

#수강등록정보(enrolled_id)별 전체 강의 수와 들은 강의의 수 출력해보기
with table1 as (
	select enrolled_id, count(*) as done_cnt from enrolleds_detail
	where done = 1
	group by enrolled_id
), table2 as (
	select enrolled_id, count(*) as total_cnt from enrolleds_detail
	group by enrolled_id
)

select a.enrolled_id,
       a.done_cnt,
       b.total_cnt,
       round(a.done_cnt/b.total_cnt,2) as ratio
  from table1 a
 inner join table2 b on a.enrolled_id = b.enrolled_id
 
 # 위의 것 sum을 사용해서 더 간단한 코드로 만들기 (멀리보기)
 select enrolled_id,
       sum(done) as cnt_done,
       count(*) as cnt_total
from enrolleds_detail ed
group by enrolled_id

마지막 퀴즈가 대박 어려웠다 ㅠㅠ 준비과정이 많은데 한꺼번에 많이 풀어서...

어쨋든 SQL 끝! 뿌듯하다잉 ㅎㅎ