1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
|
CREATE TEMPORARY TABLE raw_user_events AS SELECT get_json_object(event_data, '$.user_id') as user_id, get_json_object(event_data, '$.event_type') as event_type, get_json_object(event_data, '$.product_id') as product_id, get_json_object(event_data, '$.timestamp') as event_timestamp, get_json_object(event_data, '$.session_id') as session_id, CAST(get_json_object(event_data, '$.value') as DOUBLE) as event_value FROM raw_events WHERE dt >= '2023-01-01' AND get_json_object(event_data, '$.user_id') IS NOT NULL;
CREATE TABLE user_behavior_daily PARTITIONED BY (dt STRING) AS SELECT user_id, COUNT(DISTINCT session_id) as session_count, COUNT(CASE WHEN event_type = 'page_view' THEN 1 END) as page_views, COUNT(CASE WHEN event_type = 'add_to_cart' THEN 1 END) as add_to_cart_count, COUNT(CASE WHEN event_type = 'purchase' THEN 1 END) as purchase_count, SUM(CASE WHEN event_type = 'purchase' THEN event_value ELSE 0 END) as purchase_amount, COUNT(DISTINCT product_id) as unique_products_viewed, MIN(CAST(event_timestamp as TIMESTAMP)) as first_activity, MAX(CAST(event_timestamp as TIMESTAMP)) as last_activity, CAST(from_unixtime(unix_timestamp(event_timestamp, 'yyyy-MM-dd HH:mm:ss'), 'yyyy-MM-dd') as STRING) as dt FROM raw_user_events GROUP BY user_id, CAST(from_unixtime(unix_timestamp(event_timestamp, 'yyyy-MM-dd HH:mm:ss'), 'yyyy-MM-dd') as STRING);
CREATE TABLE user_segments AS SELECT user_id, AVG(session_count) as avg_daily_sessions, SUM(page_views) as total_page_views, SUM(purchase_count) as total_purchases, SUM(purchase_amount) as total_purchase_amount, COUNT(*) as active_days, CASE WHEN SUM(purchase_amount) > 1000 AND COUNT(*) > 20 THEN 'high_value' WHEN SUM(purchase_amount) > 200 AND COUNT(*) > 10 THEN 'medium_value' WHEN SUM(purchase_count) > 0 THEN 'low_value' ELSE 'browser_only' END as user_segment FROM user_behavior_daily WHERE dt >= '2023-01-01' AND dt < '2024-01-01' GROUP BY user_id;
CREATE TABLE conversion_funnel AS WITH funnel_events AS ( SELECT session_id, MAX(CASE WHEN event_type = 'page_view' THEN 1 ELSE 0 END) as has_page_view, MAX(CASE WHEN event_type = 'add_to_cart' THEN 1 ELSE 0 END) as has_add_to_cart, MAX(CASE WHEN event_type = 'checkout' THEN 1 ELSE 0 END) as has_checkout, MAX(CASE WHEN event_type = 'purchase' THEN 1 ELSE 0 END) as has_purchase FROM raw_user_events GROUP BY session_id ) SELECT 'page_view' as funnel_step, 1 as step_order, COUNT(*) as session_count FROM funnel_events WHERE has_page_view = 1
UNION ALL
SELECT 'add_to_cart' as funnel_step, 2 as step_order, COUNT(*) as session_count FROM funnel_events WHERE has_page_view = 1 AND has_add_to_cart = 1
UNION ALL
SELECT 'checkout' as funnel_step, 3 as step_order, COUNT(*) as session_count FROM funnel_events WHERE has_page_view = 1 AND has_add_to_cart = 1 AND has_checkout = 1
UNION ALL
SELECT 'purchase' as funnel_step, 4 as step_order, COUNT(*) as session_count FROM funnel_events WHERE has_page_view = 1 AND has_add_to_cart = 1 AND has_checkout = 1 AND has_purchase = 1
ORDER BY step_order;
|