{"id":359,"date":"2024-04-20T01:18:14","date_gmt":"2024-04-19T16:18:14","guid":{"rendered":"https:\/\/chocottopro.com\/?p=359"},"modified":"2024-04-26T11:03:13","modified_gmt":"2024-04-26T02:03:13","slug":"%e3%80%90%e5%88%9d%e5%bf%83%e8%80%85%e5%bf%85%e8%a6%8b%e3%80%91pyspark%e3%83%9e%e3%82%b9%e3%82%bf%e3%83%bc%e8%ac%9b%e5%ba%a7-%e5%ae%9f%e5%8b%99%e3%81%a7%e4%bd%bf%e3%81%88%e3%82%8btips%e3%81%a8","status":"publish","type":"post","link":"https:\/\/chocottopro.com\/?p=359","title":{"rendered":"\u3010\u521d\u5fc3\u8005\u5fc5\u898b\u3011PySpark\u30de\u30b9\u30bf\u30fc\u8b1b\u5ea7 &#8211; \u5b9f\u52d9\u3067\u4f7f\u3048\u308bTips\u3068\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9"},"content":{"rendered":"\n<div class=\"toc\"><br \/>\n<b>Warning<\/b>:  Undefined array key \"is_admin\" in <b>\/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>116<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_category_top\" in <b>\/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>121<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_top\" in <b>\/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>128<\/b><br \/>\n    <div id=\"toc_container\" class=\"sgb-toc--bullets js-smooth-scroll\" data-dialog-title=\"Table of Contents\">\n      <p class=\"toc_title\">\u76ee\u6b21 <\/p>\n      <ul class=\"toc_list\">  <li class=\"first\">    <a href=\"#i-0\">PySpark\u3068\u306f\uff1fPython\u304b\u3089Spark\u3092\u4f7f\u3046\u30e1\u30ea\u30c3\u30c8<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-1\">PySpark\u306e\u6982\u8981\u3068\u4ed5\u7d44\u307f<\/a>      <\/li>      <li>        <a href=\"#i-2\">PySpark\u3092\u4f7f\u30463\u3064\u306e\u30e1\u30ea\u30c3\u30c8<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-3\">PySpark\u3092\u4f7f\u3046\u3079\u304d\u696d\u52d9\u3068\u9069\u3055\u306a\u3044\u696d\u52d9<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-4\">PySpark\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b3\u3064\u306e\u65b9\u6cd5<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-5\">\u30ed\u30fc\u30ab\u30eb\u74b0\u5883\u306bPySpark\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u624b\u9806<\/a>      <\/li>      <li>        <a href=\"#i-6\">Databricks Community Edition\u3092\u4f7f\u3046\u65b9\u6cd5<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-7\">Google Colab\u3067\u624b\u8efd\u306b\u59cb\u3081\u308b\u65b9\u6cd5<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-8\">PySpark\u306e\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9 \u2013 10\u306e\u57fa\u790e\u77e5\u8b58\u3068Tips<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-9\">SparkSession\u306e\u4f5c\u6210\u3068RDD\u306e\u57fa\u672c\u64cd\u4f5c<\/a>      <\/li>      <li>        <a href=\"#i-10\">DataFrame\u306e\u8aad\u307f\u8fbc\u307f\u3001\u64cd\u4f5c\u3001\u4fdd\u5b58<\/a>      <\/li>      <li>        <a href=\"#i-11\">Spark\u306e\u5185\u90e8\u52d5\u4f5c \u2013 \u30b9\u30c6\u30fc\u30b8\u3001\u30bf\u30b9\u30af\u3001\u30b7\u30e3\u30c3\u30d5\u30eb\u306b\u3064\u3044\u3066<\/a>      <\/li>      <li>        <a href=\"#i-12\">UDF\uff08\u30e6\u30fc\u30b6\u30fc\u5b9a\u7fa9\u95a2\u6570\uff09\u306e\u4f5c\u308a\u65b9<\/a>      <\/li>      <li>        <a href=\"#i-13\">Spark SQL\u3092\u4f7f\u3063\u305f\u30af\u30a8\u30ea\u5b9f\u884c<\/a>      <\/li>      <li>        <a href=\"#i-14\">MLlib\u3092\u4f7f\u3063\u305f\u6a5f\u68b0\u5b66\u7fd2\u306e\u57fa\u672c<\/a>      <\/li>      <li>        <a href=\"#i-15\">GraphFrames\u3092\u4f7f\u3063\u305f\u30b0\u30e9\u30d5\u5206\u6790<\/a>      <\/li>      <li>        <a href=\"#i-16\">\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u65b9\u6cd5<\/a>      <\/li>      <li>        <a href=\"#i-17\">Spark\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u65b9\u6cd5<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-18\">\u30ed\u30b0\u51fa\u529b\u3068\u30c7\u30d0\u30c3\u30b0\u65b9\u6cd5\u306e\u7406\u89e3<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-19\">PySpark\u3092\u4f7f\u3063\u305f5\u3064\u306e\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-20\">\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306eETL\u51e6\u7406<\/a>      <\/li>      <li>        <a href=\"#i-21\">\u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u4f7f\u3063\u305f\u30e6\u30fc\u30b6\u30fc\u884c\u52d5\u5206\u6790<\/a>      <\/li>      <li>        <a href=\"#i-22\">\u6a5f\u68b0\u5b66\u7fd2\u3092\u4f7f\u3063\u305f\u5546\u54c1\u30ec\u30b3\u30e1\u30f3\u30c9<\/a>      <\/li>      <li>        <a href=\"#i-23\">\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3092\u4f7f\u3063\u305f\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u7570\u5e38\u691c\u77e5<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-24\">SNS\u30c7\u30fc\u30bf\u3092\u4f7f\u3063\u305f\u30a4\u30f3\u30d5\u30eb\u30a8\u30f3\u30b5\u30fc\u5206\u6790<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-25\">PySpark\u306e\u30a8\u30b3\u30b7\u30b9\u30c6\u30e0\u3068\u904b\u7528\u3001\u9ad8\u5ea6\u306a\u8a71\u984c<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-26\">Jupyter\u3084Zeppelin\u306a\u3069\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u74b0\u5883\u3068\u306e\u9023\u643a\u65b9\u6cd5<\/a>      <\/li>      <li>        <a href=\"#i-27\">Airflow\u3092\u4f7f\u3063\u305f\u30ef\u30fc\u30af\u30d5\u30ed\u30fc\u7ba1\u7406<\/a>      <\/li>      <li>        <a href=\"#i-28\">Kafka\u306a\u3069\u4ed6\u306e\u30c7\u30fc\u30bf\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u30c4\u30fc\u30eb\u3068\u306e\u9023\u643a<\/a>      <\/li>      <li>        <a href=\"#i-29\">Pandas\u3068PySpark\u3092\u4f7f\u3044\u5206\u3051\u308b\u57fa\u6e96<\/a>      <\/li>      <li>        <a href=\"#i-30\">PySpark\u3068Deep Learning\u306e\u7d44\u307f\u5408\u308f\u305b\u65b9<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-31\">PySpark\u3092\u30af\u30e9\u30a6\u30c9\u74b0\u5883\uff08AWS\u3001GCP\u3001Azure\uff09\u3067\u904b\u7528\u3059\u308b\u65b9\u6cd5<\/a>      <\/li>    <\/ul>  <\/li>  <li class=\"last\">    <a href=\"#i-32\">\u307e\u3068\u3081<\/a>  <\/li><\/ul>\n      \n    <\/div><\/div><h2 class=\"wp-block-heading\" id=\"i-0\">PySpark\u3068\u306f\uff1fPython\u304b\u3089Spark\u3092\u4f7f\u3046\u30e1\u30ea\u30c3\u30c8<\/h2>\n\n\n\n<p>PySpark \u306f\u3001Apache Spark \u306e Python API \u3067\u3042\u308a\u3001Python \u304b\u3089 Spark \u306e\u6a5f\u80fd\u3092\u5229\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002Spark \u306f\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u51e6\u7406\u306e\u305f\u3081\u306e\u5206\u6563\u30b3\u30f3\u30d4\u30e5\u30fc\u30c6\u30a3\u30f3\u30b0\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3067\u3042\u308a\u3001PySpark \u3092\u4f7f\u3046\u3053\u3068\u3067\u3001Python \u30e6\u30fc\u30b6\u30fc\u306f Spark \u306e\u5f37\u529b\u306a\u51e6\u7406\u80fd\u529b\u3092\u6d3b\u7528\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-1\">PySpark\u306e\u6982\u8981\u3068\u4ed5\u7d44\u307f<\/h3>\n\n\n\n<p>PySpark \u306f\u3001Spark \u306e\u57fa\u672c\u7684\u306a\u30c7\u30fc\u30bf\u69cb\u9020\u3067\u3042\u308b RDD\uff08Resilient Distributed Datasets\uff09\u3001DataFrame\u3001DataSet \u306a\u3069\u3092\u6271\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002PySpark \u30d7\u30ed\u30b0\u30e9\u30e0\u306f\u3001Python \u304b\u3089 JVM \u3067\u52d5\u4f5c\u3059\u308b Spark \u306b\u30b8\u30e7\u30d6\u3092\u9001\u4fe1\u3057\u3001Spark \u306e\u30a8\u30b0\u30bc\u30ad\u30e5\u30fc\u30bf\u4e0a\u3067 Python \u30d7\u30ed\u30bb\u30b9\u304c\u8d77\u52d5\u3055\u308c\u308b\u3053\u3068\u3067\u3001\u5206\u6563\u51e6\u7406\u304c\u5b9f\u884c\u3055\u308c\u307e\u3059\u3002\u305f\u3060\u3057\u3001Python \u3068 JVM \u9593\u3067\u30c7\u30fc\u30bf\u306e\u30b7\u30ea\u30a2\u30e9\u30a4\u30ba\u30fb\u30c7\u30b7\u30ea\u30a2\u30e9\u30a4\u30ba\u304c\u767a\u751f\u3059\u308b\u305f\u3081\u3001\u82e5\u5e72\u306e\u30aa\u30fc\u30d0\u30fc\u30d8\u30c3\u30c9\u304c\u5b58\u5728\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-2\">PySpark\u3092\u4f7f\u30463\u3064\u306e\u30e1\u30ea\u30c3\u30c8<\/h3>\n\n\n\n<p>PySpark \u3092\u4f7f\u3046\u30e1\u30ea\u30c3\u30c8\u306f\u4ee5\u4e0b\u306e3\u3064\u304c\u6319\u3052\u3089\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Python\u30a8\u30b3\u30b7\u30b9\u30c6\u30e0\u3068\u306e\u89aa\u548c\u6027<\/strong>: PySpark \u3092\u4f7f\u3046\u3053\u3068\u3067\u3001Python \u306e\u8c4a\u5bcc\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u3084\u30c4\u30fc\u30eb\u3092\u6d3b\u7528\u3057\u306a\u304c\u3089\u3001Spark \u306e\u51e6\u7406\u80fd\u529b\u3092\u5229\u7528\u3067\u304d\u307e\u3059\u3002<\/li>\n\n\n\n<li><strong>Pandas\u30e6\u30fc\u30b6\u30fc\u306b\u3068\u3063\u3066\u306e\u4f7f\u3044\u3084\u3059\u3055<\/strong>: PySpark \u306e DataFrame \u306f\u3001Pandas \u306e DataFrame \u3068\u4f3c\u305f\u64cd\u4f5c\u611f\u3092\u63d0\u4f9b\u3059\u308b\u305f\u3081\u3001Pandas \u30e6\u30fc\u30b6\u30fc\u306b\u3068\u3063\u3066\u5b66\u7fd2\u30b3\u30b9\u30c8\u304c\u4f4e\u304f\u306a\u308a\u307e\u3059\u3002<\/li>\n\n\n\n<li><strong>\u5b66\u7fd2\u30b3\u30b9\u30c8\u306e\u4f4e\u3055\u3068Python\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u6d3b\u7528<\/strong>: Scala \u306b\u6bd4\u3079\u3066\u3001Python \u306f\u4e00\u822c\u7684\u306b\u5b66\u7fd2\u30b3\u30b9\u30c8\u304c\u4f4e\u3044\u3068\u8a00\u308f\u308c\u3066\u3044\u307e\u3059\u3002\u307e\u305f\u3001Python \u306e\u8c4a\u5bcc\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u6d3b\u7528\u3059\u308b\u3053\u3068\u3067\u3001\u30c7\u30fc\u30bf\u51e6\u7406\u3084\u5206\u6790\u306e\u751f\u7523\u6027\u3092\u9ad8\u3081\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-3\">PySpark\u3092\u4f7f\u3046\u3079\u304d\u696d\u52d9\u3068\u9069\u3055\u306a\u3044\u696d\u52d9<\/h3>\n\n\n\n<p>PySpark \u306f\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u696d\u52d9\u306b\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u524d\u51e6\u7406\u3084ETL\u51e6\u7406<\/li>\n\n\n\n<li>\u6a5f\u68b0\u5b66\u7fd2\u3084\u30c7\u30fc\u30bf\u5206\u6790\u306e\u30ef\u30fc\u30af\u30ed\u30fc\u30c9<\/li>\n\n\n\n<li>\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u306e\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u51e6\u7406<\/li>\n<\/ul>\n\n\n\n<p>\u4e00\u65b9\u3067\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u696d\u52d9\u306b\u306f\u9069\u3055\u306a\u3044\u5834\u5408\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30b7\u30ea\u30a2\u30e9\u30a4\u30ba\u30fb\u30c7\u30b7\u30ea\u30a2\u30e9\u30a4\u30ba\u306e\u30aa\u30fc\u30d0\u30fc\u30d8\u30c3\u30c9\u304c\u5927\u304d\u304f\u306a\u308b\u3088\u3046\u306a\u51e6\u7406<\/li>\n\n\n\n<li>\u975e\u5e38\u306b\u4f4e\u30ec\u30a4\u30c6\u30f3\u30b7\u304c\u6c42\u3081\u3089\u308c\u308b\u51e6\u7406<\/li>\n\n\n\n<li>Scala \u3084 Java \u307b\u3069\u306e\u67d4\u8edf\u6027\u3084\u6700\u9069\u5316\u304c\u5fc5\u8981\u306a\u51e6\u7406<\/li>\n<\/ul>\n\n\n\n<p>PySpark \u306f\u3001Python\u306e\u5229\u4fbf\u6027\u3068Spark\u306e\u51e6\u7406\u80fd\u529b\u3092\u517c\u306d\u5099\u3048\u305f\u30c4\u30fc\u30eb\u3067\u3042\u308a\u3001\u9069\u6750\u9069\u6240\u3067\u4f7f\u3046\u3053\u3068\u3067\u3001\u30c7\u30fc\u30bf\u51e6\u7406\u3084\u5206\u6790\u306e\u751f\u7523\u6027\u3092\u5927\u304d\u304f\u5411\u4e0a\u3055\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u6b21\u7ae0\u3067\u306f\u3001PySpark \u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u65b9\u6cd5\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-4\">PySpark\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b3\u3064\u306e\u65b9\u6cd5<\/h2>\n\n\n\n<p>PySpark \u3092\u4f7f\u3044\u59cb\u3081\u308b\u306b\u306f\u3001\u74b0\u5883\u306e\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7\u304c\u5fc5\u8981\u3067\u3059\u3002\u3053\u3053\u3067\u306f\u3001PySpark\u3092\u5c0e\u5165\u3059\u308b3\u3064\u306e\u65b9\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002\u76ee\u7684\u3084\u30b9\u30ad\u30eb\u30ec\u30d9\u30eb\u306b\u5fdc\u3058\u3066\u3001\u6700\u9069\u306a\u65b9\u6cd5\u3092\u9078\u629e\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-5\">\u30ed\u30fc\u30ab\u30eb\u74b0\u5883\u306bPySpark\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u624b\u9806<\/h3>\n\n\n\n<p>\u30ed\u30fc\u30ab\u30eb\u74b0\u5883\u306bPySpark\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u3053\u3068\u3067\u3001\u81ea\u5206\u306e\u30de\u30b7\u30f3\u3067PySpark\u3092\u5b9f\u884c\u3067\u304d\u307e\u3059\u3002\u4ee5\u4e0b\u306e\u624b\u9806\u306b\u5f93\u3063\u3066\u3001PySpark\u3092\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7\u3057\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Java Development Kit (JDK)\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>Apache Spark\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3001\u89e3\u51cd\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u74b0\u5883\u5909\u6570 SPARK_HOME \u3068 PYTHONPATH \u3092\u8a2d\u5b9a\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>pip install pyspark<\/code> \u30b3\u30de\u30f3\u30c9\u3092\u5b9f\u884c\u3057\u3001PySpark\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u30ed\u30fc\u30ab\u30eb\u74b0\u5883\u3067\u306e\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7\u306f\u3001\u74b0\u5883\u3092\u81ea\u7531\u306b\u30ab\u30b9\u30bf\u30de\u30a4\u30ba\u3067\u304d\u308b\u30e1\u30ea\u30c3\u30c8\u304c\u3042\u308a\u307e\u3059\u304c\u3001\u74b0\u5883\u69cb\u7bc9\u306e\u624b\u9593\u304c\u304b\u304b\u308a\u307e\u3059\u3002Spark\u306e\u4ed5\u7d44\u307f\u3092\u6df1\u304f\u7406\u89e3\u3057\u305f\u3044\u65b9\u306b\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-6\">Databricks Community Edition\u3092\u4f7f\u3046\u65b9\u6cd5<\/h3>\n\n\n\n<p>Databricks Community Edition\u306f\u3001Web\u30d6\u30e9\u30a6\u30b6\u4e0a\u3067\u5229\u7528\u3067\u304d\u308b\u7121\u6599\u306eSpark\u74b0\u5883\u3067\u3059\u3002\u4ee5\u4e0b\u306e\u7279\u5fb4\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u5f62\u5f0f\u3067PySpark\u30b3\u30fc\u30c9\u3092\u8a18\u8ff0\u30fb\u5b9f\u884c\u3067\u304d\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u30af\u30e9\u30b9\u30bf\u7ba1\u7406\u3084\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u304c\u4e0d\u8981\u3067\u3059\u3002<\/li>\n\n\n\n<li>\u4e00\u90e8\u306e\u6a5f\u80fd\u306b\u5236\u9650\u304c\u3042\u308a\u307e\u3059\uff08\u30af\u30e9\u30b9\u30bf\u30b5\u30a4\u30ba\u3001\u5b9f\u884c\u6642\u9593\u306a\u3069\uff09\u3002<\/li>\n<\/ul>\n\n\n\n<p>Databricks\u306f\u3001\u74b0\u5883\u69cb\u7bc9\u306e\u624b\u9593\u306a\u304f\u3001\u624b\u8efd\u306bPySpark\u3092\u8a66\u3059\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002Spark\u306e\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9\u3092\u5b66\u3073\u305f\u3044\u65b9\u306b\u304a\u3059\u3059\u3081\u3067\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-7\">Google Colab\u3067\u624b\u8efd\u306b\u59cb\u3081\u308b\u65b9\u6cd5<\/h3>\n\n\n\n<p>Google Colaboratory\uff08Colab\uff09\u306f\u3001Google\u304c\u63d0\u4f9b\u3059\u308bJupyter Notebook\u306e\u7121\u6599\u30b5\u30fc\u30d3\u30b9\u3067\u3059\u3002Colab\u3092\u4f7f\u3048\u3070\u3001PySpark\u3092\u8ffd\u52a0\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u3053\u3068\u306a\u304f\u3001\u3059\u3050\u306b\u4f7f\u3044\u59cb\u3081\u3089\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u5f62\u5f0f\u3067PySpark\u3092\u8a18\u8ff0\u30fb\u5b9f\u884c\u3067\u304d\u307e\u3059\u3002<\/li>\n\n\n\n<li>Google Drive\u3068\u9023\u643a\u3057\u3066\u30c7\u30fc\u30bf\u306e\u8aad\u307f\u66f8\u304d\u304c\u53ef\u80fd\u3067\u3059\u3002<\/li>\n\n\n\n<li>GPU\u3084TPU\u3092\u5229\u7528\u3067\u304d\u308b\u305f\u3081\u3001\u6a5f\u68b0\u5b66\u7fd2\u306e\u30bf\u30b9\u30af\u306b\u3082\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n<\/ul>\n\n\n\n<p>Google Colab\u306f\u3001\u74b0\u5883\u69cb\u7bc9\u4e0d\u8981\u3067\u3059\u3050\u306bPySpark\u3092\u8a66\u305b\u308b\u305f\u3081\u3001\u624b\u8efd\u306b\u59cb\u3081\u305f\u3044\u65b9\u306b\u6700\u9069\u3067\u3059\u3002<\/p>\n\n\n\n<p>PySpark\u3092\u5c0e\u5165\u3059\u308b\u65b9\u6cd5\u306f\u3001\u76ee\u7684\u3084\u30b9\u30ad\u30eb\u30ec\u30d9\u30eb\u306b\u5fdc\u3058\u3066\u9078\u629e\u80a2\u304c\u3042\u308a\u307e\u3059\u3002\u30ed\u30fc\u30ab\u30eb\u74b0\u5883\u3067\u3058\u3063\u304f\u308a\u5b66\u7fd2\u3057\u305f\u3044\u65b9\u306f\u3001\u624b\u9806\u306b\u5f93\u3063\u3066PySpark\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3057\u3087\u3046\u3002\u624b\u8efd\u306b\u8a66\u3057\u3066\u307f\u305f\u3044\u65b9\u306f\u3001Databricks Community Edition\u3084Google Colab\u3092\u6d3b\u7528\u3057\u3066\u304f\u3060\u3055\u3044\u3002\u6b21\u7ae0\u3067\u306f\u3001PySpark\u306e\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-8\">PySpark\u306e\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9 \u2013 10\u306e\u57fa\u790e\u77e5\u8b58\u3068Tips<\/h2>\n\n\n\n<p>PySpark\u3092\u52b9\u679c\u7684\u306b\u4f7f\u3044\u3053\u306a\u3059\u306b\u306f\u3001\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9\u3092\u7406\u89e3\u3059\u308b\u3053\u3068\u304c\u91cd\u8981\u3067\u3059\u3002\u3053\u3053\u3067\u306f\u3001PySpark\u3092\u6271\u3046\u4e0a\u3067\u5fc5\u8981\u306a10\u306e\u57fa\u790e\u77e5\u8b58\u3068Tips\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-9\">SparkSession\u306e\u4f5c\u6210\u3068RDD\u306e\u57fa\u672c\u64cd\u4f5c<\/h3>\n\n\n\n<p>PySpark \u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u3092\u958b\u767a\u3059\u308b\u969b\u306b\u306f\u3001\u307e\u305a SparkSession \u3092\u4f5c\u6210\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002SparkSession \u306f\u3001Spark \u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u30a8\u30f3\u30c8\u30ea\u30fc\u30dd\u30a4\u30f3\u30c8\u3067\u3042\u308a\u3001Spark \u306e\u8a2d\u5b9a\u3084\u64cd\u4f5c\u3092\u884c\u3046\u305f\u3081\u306e\u8d77\u70b9\u3068\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>RDD\uff08Resilient Distributed Dataset\uff09\u306f\u3001Spark \u306e\u57fa\u672c\u7684\u306a\u30c7\u30fc\u30bf\u69cb\u9020\u3067\u3059\u3002RDD \u306f\u3001\u8907\u6570\u306e\u30ce\u30fc\u30c9\u306b\u5206\u6563\u3055\u308c\u305f\u30c7\u30fc\u30bf\u306e\u96c6\u5408\u3067\u3042\u308a\u3001map\u3001filter\u3001reduce \u306a\u3069\u306e\u64cd\u4f5c\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u64cd\u4f5c\u306f\u3001\u5404\u30ce\u30fc\u30c9\u3067\u4e26\u5217\u306b\u5b9f\u884c\u3055\u308c\u308b\u305f\u3081\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u306b\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from pyspark.sql import SparkSession\n\nspark = SparkSession.builder \\\n    .appName(\"MyApp\") \\\n    .getOrCreate()\n\nrdd = spark.sparkContext.parallelize([1, 2, 3, 4, 5])\nsquared_rdd = rdd.map(lambda x: x ** 2)<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-10\">DataFrame\u306e\u8aad\u307f\u8fbc\u307f\u3001\u64cd\u4f5c\u3001\u4fdd\u5b58<\/h3>\n\n\n\n<p>DataFrame \u306f\u3001RDD \u3092\u69cb\u9020\u5316\u30c7\u30fc\u30bf\u3068\u3057\u3066\u6271\u3046\u305f\u3081\u306e\u62bd\u8c61\u5316\u3057\u305f\u30c7\u30fc\u30bf\u69cb\u9020\u3067\u3059\u3002DataFrame \u3092\u4f7f\u3046\u3053\u3068\u3067\u3001SQL\u306b\u4f3c\u305f\u64cd\u4f5c\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>DataFrame\u306f\u3001CSV\u3001JSON\u3001Parquet \u306a\u3069\u306e\u69d8\u3005\u306a\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u8aad\u307f\u8fbc\u3080\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001select\u3001filter\u3001groupBy \u306a\u3069\u306e\u64cd\u4f5c\u3092\u4f7f\u3063\u3066\u3001\u30c7\u30fc\u30bf\u3092\u52a0\u5de5\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u51e6\u7406\u5f8c\u306eDataFrame\u306f\u3001\u69d8\u3005\u306a\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u4fdd\u5b58\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">df = spark.read.csv(\"data.csv\", header=True, inferSchema=True)\nfiltered_df = df.filter(df[\"age\"] &gt; 18)\ngrouped_df = filtered_df.groupBy(\"department\").avg(\"salary\")\ngrouped_df.write.parquet(\"output\")<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-11\">Spark\u306e\u5185\u90e8\u52d5\u4f5c \u2013 \u30b9\u30c6\u30fc\u30b8\u3001\u30bf\u30b9\u30af\u3001\u30b7\u30e3\u30c3\u30d5\u30eb\u306b\u3064\u3044\u3066<\/h3>\n\n\n\n<p>Spark \u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306f\u3001\u8907\u6570\u306e\u30b9\u30c6\u30fc\u30b8\u306b\u5206\u5272\u3055\u308c\u3001\u5404\u30b9\u30c6\u30fc\u30b8\u306f\u8907\u6570\u306e\u30bf\u30b9\u30af\u3067\u69cb\u6210\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u5404\u30bf\u30b9\u30af\u306f\u3001\u30af\u30e9\u30b9\u30bf\u5185\u306e\u7570\u306a\u308b\u30ce\u30fc\u30c9\u3067\u4e26\u5217\u306b\u5b9f\u884c\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30b7\u30e3\u30c3\u30d5\u30eb\u306f\u3001\u30c7\u30fc\u30bf\u3092\u518d\u5206\u914d\u3059\u308b\u305f\u3081\u306e\u64cd\u4f5c\u3067\u3059\u3002\u30b7\u30e3\u30c3\u30d5\u30eb\u304c\u767a\u751f\u3059\u308b\u3068\u3001\u30c7\u30fc\u30bf\u304c\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u3092\u4ecb\u3057\u3066\u8ee2\u9001\u3055\u308c\u308b\u305f\u3081\u3001\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u306b\u5f71\u97ff\u3092\u4e0e\u3048\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002\u30b7\u30e3\u30c3\u30d5\u30eb\u306e\u767a\u751f\u3092\u6291\u3048\u308b\u3053\u3068\u3067\u3001\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3092\u5411\u4e0a\u3055\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-12\">UDF\uff08\u30e6\u30fc\u30b6\u30fc\u5b9a\u7fa9\u95a2\u6570\uff09\u306e\u4f5c\u308a\u65b9<\/h3>\n\n\n\n<p>UDF\uff08User-Defined Function\uff09\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u304c\u5b9a\u7fa9\u3057\u305f\u30ab\u30b9\u30bf\u30e0\u95a2\u6570\u3067\u3059\u3002UDF \u3092\u4f7f\u3046\u3053\u3068\u3067\u3001DataFrame\u306e\u5404\u884c\u306b\u5bfe\u3057\u3066\u4efb\u610f\u306e\u51e6\u7406\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from pyspark.sql.functions import udf\nfrom pyspark.sql.types import IntegerType\n\ndef square(x):\n    return x ** 2\n\nsquare_udf = udf(square, IntegerType())\ndf = df.withColumn(\"squared_value\", square_udf(df[\"value\"]))<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-13\">Spark SQL\u3092\u4f7f\u3063\u305f\u30af\u30a8\u30ea\u5b9f\u884c<\/h3>\n\n\n\n<p>Spark SQL \u3092\u4f7f\u3046\u3053\u3068\u3067\u3001DataFrame\u306b\u5bfe\u3057\u3066SQL\u30af\u30a8\u30ea\u3092\u5b9f\u884c\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002Spark SQL\u306f\u3001Hive\u4e92\u63db\u306e\u30af\u30a8\u30ea\u8a00\u8a9e\u3067\u3042\u308a\u3001SQL\u306b\u6163\u308c\u89aa\u3057\u3093\u3067\u3044\u308b\u30e6\u30fc\u30b6\u30fc\u306b\u3068\u3063\u3066\u4f7f\u3044\u3084\u3059\u3044\u30a4\u30f3\u30bf\u30fc\u30d5\u30a7\u30fc\u30b9\u3092\u63d0\u4f9b\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">df.createOrReplaceTempView(\"table\")\nresult = spark.sql(\"SELECT department, AVG(salary) AS avg_salary FROM table GROUP BY department\")<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-14\">MLlib\u3092\u4f7f\u3063\u305f\u6a5f\u68b0\u5b66\u7fd2\u306e\u57fa\u672c<\/h3>\n\n\n\n<p>MLlib \u306f\u3001Spark \u4e0a\u3067\u6a5f\u68b0\u5b66\u7fd2\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u5b9f\u88c5\u3059\u308b\u305f\u3081\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3059\u3002\u5206\u985e\u3001\u56de\u5e30\u3001\u30af\u30e9\u30b9\u30bf\u30ea\u30f3\u30b0\u3001\u5354\u8abf\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\u306a\u3069\u3001\u69d8\u3005\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u304c\u63d0\u4f9b\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>MLlib \u3092\u4f7f\u3046\u969b\u306b\u306f\u3001\u30c7\u30fc\u30bf\u306e\u524d\u51e6\u7406\u3001\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u9078\u629e\u3001\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u69cb\u7bc9\u3001\u30e2\u30c7\u30eb\u306e\u8a55\u4fa1\u3068\u3044\u3063\u305f\u4e00\u9023\u306e\u6d41\u308c\u3092\u7406\u89e3\u3059\u308b\u3053\u3068\u304c\u91cd\u8981\u3067\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-15\">GraphFrames\u3092\u4f7f\u3063\u305f\u30b0\u30e9\u30d5\u5206\u6790<\/h3>\n\n\n\n<p>GraphFrames \u306f\u3001Spark\u4e0a\u3067\u30b0\u30e9\u30d5\u30c7\u30fc\u30bf\u3092\u51e6\u7406\u3059\u308b\u305f\u3081\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3059\u3002GraphFrames\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001PageRank\u306a\u3069\u306e\u69d8\u3005\u306a\u30b0\u30e9\u30d5\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u5b9f\u884c\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from graphframes import GraphFrame\n\nvertices = spark.createDataFrame([(\"1\", \"Alice\"), (\"2\", \"Bob\"), (\"3\", \"Charlie\")], [\"id\", \"name\"])\nedges = spark.createDataFrame([(\"1\", \"2\"), (\"2\", \"3\"), (\"3\", \"1\")], [\"src\", \"dst\"])\n\ngraph = GraphFrame(vertices, edges)\npagerank = graph.pageRank(resetProbability=0.15, maxIter=10)<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-16\">\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u65b9\u6cd5<\/h3>\n\n\n\n<p>PySpark \u3092\u4f7f\u3063\u3066\u3001\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u306b\u30c7\u30fc\u30bf\u3092\u51e6\u7406\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3092\u51e6\u7406\u3059\u308b\u969b\u306b\u306f\u3001DStream\uff08Discretized Stream\uff09\u3084 Structured Streaming \u3092\u4f7f\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>DStream \u306f\u3001RDD \u3092\u6642\u9593\u7684\u306b\u9023\u7d9a\u3057\u305f\u3082\u306e\u3068\u3057\u3066\u6271\u3046\u30c7\u30fc\u30bf\u69cb\u9020\u3067\u3059\u3002Structured Streaming \u306f\u3001DataFrame \u3092\u6642\u9593\u7684\u306b\u9023\u7d9a\u3057\u305f\u3082\u306e\u3068\u3057\u3066\u6271\u3046\u305f\u3081\u306e\u9ad8\u30ec\u30d9\u30eb\u306aAPI\u3067\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-17\">Spark\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u65b9\u6cd5<\/h3>\n\n\n\n<p>Spark \u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3092\u6700\u9069\u5316\u3059\u308b\u306b\u306f\u3001\u69d8\u3005\u306a\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u304c\u5fc5\u8981\u3067\u3059\u3002\u4f8b\u3048\u3070\u3001\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306e\u8abf\u6574\u3001\u30e1\u30e2\u30ea\u5272\u308a\u5f53\u3066\u306e\u6700\u9069\u5316\u3001\u30b7\u30ea\u30a2\u30e9\u30a4\u30ba\u65b9\u5f0f\u306e\u9078\u629e\u306a\u3069\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u30dc\u30c8\u30eb\u30cd\u30c3\u30af\u3092\u7279\u5b9a\u3057\u3001\u9069\u5207\u306a\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u3092\u884c\u3046\u3053\u3068\u3067\u3001\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3092\u5927\u5e45\u306b\u5411\u4e0a\u3055\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-18\">\u30ed\u30b0\u51fa\u529b\u3068\u30c7\u30d0\u30c3\u30b0\u65b9\u6cd5\u306e\u7406\u89e3<\/h3>\n\n\n\n<p>Spark \u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u958b\u767a\u3067\u306f\u3001\u30ed\u30b0\u51fa\u529b\u3068\u30c7\u30d0\u30c3\u30b0\u65b9\u6cd5\u3092\u7406\u89e3\u3059\u308b\u3053\u3068\u304c\u91cd\u8981\u3067\u3059\u3002\u30ed\u30b0\u30ec\u30d9\u30eb\u3092\u9069\u5207\u306b\u8a2d\u5b9a\u3059\u308b\u3053\u3068\u3067\u3001\u554f\u984c\u306e\u539f\u56e0\u3092\u7279\u5b9a\u3057\u3084\u3059\u304f\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001Spark UI\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u5b9f\u884c\u72b6\u6cc1\u3092\u53ef\u8996\u5316\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002Spark UI\u3092\u6d3b\u7528\u3059\u308b\u3053\u3068\u3067\u3001\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u306e\u554f\u984c\u3084\u30a8\u30e9\u30fc\u306e\u539f\u56e0\u3092\u7279\u5b9a\u3057\u3084\u3059\u304f\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0a\u3001PySpark\u306e\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9\u306b\u3064\u3044\u3066\u300110\u306e\u57fa\u790e\u77e5\u8b58\u3068Tips\u3092\u7d39\u4ecb\u3057\u307e\u3057\u305f\u3002\u3053\u308c\u3089\u3092\u7406\u89e3\u3059\u308b\u3053\u3068\u3067\u3001PySpark\u3092\u4f7f\u3063\u305f\u30c7\u30fc\u30bf\u51e6\u7406\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u958b\u767a\u304c\u30b9\u30e0\u30fc\u30ba\u306b\u884c\u3048\u308b\u3088\u3046\u306b\u306a\u308b\u3067\u3057\u3087\u3046\u3002\u6b21\u7ae0\u3067\u306f\u3001PySpark\u3092\u4f7f\u3063\u305f\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-19\">PySpark\u3092\u4f7f\u3063\u305f5\u3064\u306e\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9<\/h2>\n\n\n\n<p>PySpark\u306f\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u51e6\u7406\u306b\u9069\u3057\u305f\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3067\u3042\u308a\u3001\u69d8\u3005\u306a\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u306b\u5fdc\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001PySpark\u3092\u4f7f\u3063\u305f5\u3064\u306e\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-20\">\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306eETL\u51e6\u7406<\/h3>\n\n\n\n<p>ETL\uff08Extract, Transform, Load\uff09\u306f\u3001\u30c7\u30fc\u30bf\u306e\u62bd\u51fa\u3001\u5909\u63db\u3001\u30ed\u30fc\u30c9\u3092\u884c\u3046\u30d7\u30ed\u30bb\u30b9\u3067\u3059\u3002PySpark\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306eETL\u51e6\u7406\u3092\u52b9\u7387\u7684\u306b\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>ETL\u51e6\u7406\u3067\u306f\u3001\u69d8\u3005\u306a\u30c7\u30fc\u30bf\u30bd\u30fc\u30b9\uff08CSV\u30d5\u30a1\u30a4\u30eb\u3001JSON\u30d5\u30a1\u30a4\u30eb\u3001\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u306a\u3069\uff09\u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u3001\u5fc5\u8981\u306a\u5909\u63db\u3092\u884c\u3063\u305f\u5f8c\u3001\u52a0\u5de5\u3055\u308c\u305f\u30c7\u30fc\u30bf\u3092\u5225\u306e\u30c7\u30fc\u30bf\u30b9\u30c8\u30a2\u306b\u66f8\u304d\u8fbc\u307f\u307e\u3059\u3002PySpark\u306eDataFrame\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u3053\u308c\u3089\u306e\u51e6\u7406\u3092\u7c21\u6f54\u306b\u8a18\u8ff0\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001ETL\u51e6\u7406\u3067\u306f\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u304c\u91cd\u8981\u306b\u306a\u308a\u307e\u3059\u3002PySpark\u3067\u306f\u3001\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30cb\u30f3\u30b0\u3084\u30ad\u30e3\u30c3\u30b7\u30e5\u306a\u3069\u306e\u6700\u9069\u5316\u624b\u6cd5\u3092\u7528\u3044\u308b\u3053\u3068\u3067\u3001\u51e6\u7406\u901f\u5ea6\u3092\u5927\u5e45\u306b\u5411\u4e0a\u3055\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>ETL\u51e6\u7406\u306e\u30b3\u30fc\u30c9\u4f8b\u3092\u4ee5\u4e0b\u306b\u793a\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001CSV\u30d5\u30a1\u30a4\u30eb\u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u3001\u3044\u304f\u3064\u304b\u306e\u5909\u63db\u3092\u884c\u3063\u305f\u5f8c\u3001\u30d1\u30fc\u30b1\u30c3\u30c8\u30d5\u30a1\u30a4\u30eb\u3068\u3057\u3066\u30c7\u30fc\u30bf\u3092\u4fdd\u5b58\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from pyspark.sql import SparkSession\nfrom pyspark.sql.functions import col, upper, regexp_replace\n\n# SparkSession\u306e\u4f5c\u6210\nspark = SparkSession.builder \\\n    .appName(\"ETL Example\") \\\n    .getOrCreate()\n\n# CSV\u30d5\u30a1\u30a4\u30eb\u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\ndata = spark.read \\\n    .option(\"header\", \"true\") \\\n    .option(\"inferSchema\", \"true\") \\\n    .csv(\"input_data.csv\")\n\n# \u30c7\u30fc\u30bf\u306e\u5909\u63db\u51e6\u7406\ntransformed_data = data \\\n    .filter(col(\"age\") &gt; 18) \\\n    .withColumn(\"name\", upper(col(\"name\"))) \\\n    .withColumn(\"email\", regexp_replace(col(\"email\"), \"@.+\", \"@example.com\"))\n\n# \u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30cb\u30f3\u30b0\u3068\u30ad\u30e3\u30c3\u30b7\u30e5\u306b\u3088\u308b\u6700\u9069\u5316\ntransformed_data \\\n    .repartition(10) \\\n    .cache()\n\n# \u5909\u63db\u5f8c\u306e\u30c7\u30fc\u30bf\u3092\u30d1\u30fc\u30b1\u30c3\u30c8\u30d5\u30a1\u30a4\u30eb\u3068\u3057\u3066\u4fdd\u5b58\ntransformed_data.write \\\n    .mode(\"overwrite\") \\\n    .parquet(\"output_data\")\n\n# SparkSession\u306e\u505c\u6b62\nspark.stop()<\/pre>\n\n\n\n<p>\u30b5\u30f3\u30d7\u30eb\u30c7\u30fc\u30bf\u306ecsv\u30d5\u30a1\u30a4\u30eb\u306f\u3053\u3061\u3089\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">name,age,email\nJohn,25,john@example.com\nAlice,30,alice@example.com\nBob,17,bob@example.com\nEmma,20,emma@example.com\nMichael,35,michael@example.com\nSophia,28,sophia@example.com\nWilliam,16,william@example.com\nOlivia,22,olivia@example.com\nJames,40,james@example.com\nAva,19,ava@example.com<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3067\u306f\u3001\u4ee5\u4e0b\u306e\u51e6\u7406\u3092\u884c\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><code>SparkSession<\/code>\u3092\u4f5c\u6210\u3057\u3001Spark\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u3092\u521d\u671f\u5316\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>spark.read<\/code>\u3092\u4f7f\u3063\u3066\u3001CSV\u30d5\u30a1\u30a4\u30eb\uff08<code>input_data.csv<\/code>\uff09\u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u307e\u3059\u3002<code>option<\/code>\u30e1\u30bd\u30c3\u30c9\u3067\u3001\u30d8\u30c3\u30c0\u30fc\u306e\u6709\u7121\u3084\u30b9\u30ad\u30fc\u30de\u306e\u63a8\u6e2c\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>filter<\/code>\u3001<code>withColumn<\/code>\u3001<code>regexp_replace<\/code>\u306a\u3069\u306e\u95a2\u6570\u3092\u4f7f\u3063\u3066\u3001\u30c7\u30fc\u30bf\u306e\u5909\u63db\u51e6\u7406\u3092\u884c\u3044\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u5e74\u9f62\u304c18\u6b73\u4ee5\u4e0a\u306e\u30ec\u30b3\u30fc\u30c9\u3092\u62bd\u51fa\u3057\u3001\u540d\u524d\u3092\u5927\u6587\u5b57\u306b\u5909\u63db\u3057\u3001\u30e1\u30fc\u30eb\u30a2\u30c9\u30ec\u30b9\u306e\u30c9\u30e1\u30a4\u30f3\u90e8\u5206\u3092<code>@example.com<\/code>\u306b\u7f6e\u304d\u63db\u3048\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>repartition<\/code>\u3068<code>cache<\/code>\u3092\u4f7f\u3063\u3066\u3001\u30c7\u30fc\u30bf\u309210\u500b\u306e\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u306b\u5206\u5272\u3057\u3001\u30ad\u30e3\u30c3\u30b7\u30e5\u3059\u308b\u3053\u3068\u3067\u3001\u4ee5\u964d\u306e\u51e6\u7406\u3092\u9ad8\u901f\u5316\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>write<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u5909\u63db\u5f8c\u306e\u30c7\u30fc\u30bf\u3092\u30d1\u30fc\u30b1\u30c3\u30c8\u30d5\u30a1\u30a4\u30eb\u5f62\u5f0f\u3067<code>output_data<\/code>\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306b\u4fdd\u5b58\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u6700\u5f8c\u306b\u3001<code>spark.stop()<\/code>\u3067<code>SparkSession<\/code>\u3092\u505c\u6b62\u3057\u3001\u30ea\u30bd\u30fc\u30b9\u3092\u89e3\u653e\u3057\u307e\u3059\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u3053\u306e\u3088\u3046\u306bPySpark\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306eETL\u51e6\u7406\u3092\u7c21\u6f54\u304b\u3064\u52b9\u7387\u7684\u306b\u8a18\u8ff0\u3067\u304d\u307e\u3059\u3002\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30cb\u30f3\u30b0\u3084\u30ad\u30e3\u30c3\u30b7\u30e5\u306a\u3069\u306e\u6700\u9069\u5316\u624b\u6cd5\u3092\u9069\u5207\u306b\u4f7f\u3046\u3053\u3068\u3067\u3001\u51e6\u7406\u306e\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3092\u5927\u5e45\u306b\u5411\u4e0a\u3055\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-21\">\u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u4f7f\u3063\u305f\u30e6\u30fc\u30b6\u30fc\u884c\u52d5\u5206\u6790<\/h3>\n\n\n\n<p>Web\u30b5\u30a4\u30c8\u3084\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u5206\u6790\u3059\u308b\u3053\u3068\u3067\u3001\u30e6\u30fc\u30b6\u30fc\u306e\u884c\u52d5\u3092\u7406\u89e3\u3057\u3001\u30b5\u30fc\u30d3\u30b9\u306e\u6539\u5584\u306b\u5f79\u7acb\u3066\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002PySpark\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5927\u91cf\u306e\u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u52b9\u7387\u7684\u306b\u51e6\u7406\u3057\u3001\u30e6\u30fc\u30b6\u30fc\u884c\u52d5\u306e\u5206\u6790\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30ed\u30b0\u30c7\u30fc\u30bf\u306e\u5206\u6790\u3067\u306f\u3001\u307e\u305a\u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u3001\u4e0d\u8981\u306a\u60c5\u5831\u3092\u53d6\u308a\u9664\u304f\u306a\u3069\u306e\u524d\u51e6\u7406\u3092\u884c\u3044\u307e\u3059\u3002\u6b21\u306b\u3001\u30bb\u30c3\u30b7\u30e7\u30f3\u5316\u3084\u30e6\u30fc\u30b6\u30fcID\u306e\u62bd\u51fa\u306a\u3069\u3092\u884c\u3044\u3001\u30e6\u30fc\u30b6\u30fc\u3054\u3068\u306e\u884c\u52d5\u3092\u96c6\u8a08\u3057\u307e\u3059\u3002\u6700\u5f8c\u306b\u3001\u96c6\u8a08\u3055\u308c\u305f\u30c7\u30fc\u30bf\u3092\u53ef\u8996\u5316\u3059\u308b\u3053\u3068\u3067\u3001\u30e6\u30fc\u30b6\u30fc\u884c\u52d5\u306e\u50be\u5411\u3092\u628a\u63e1\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>PySpark\u3067\u306f\u3001\u30ed\u30b0\u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\u306bDataFrame\u3092\u4f7f\u3044\u3001\u30bb\u30c3\u30b7\u30e7\u30f3\u5316\u306b\u306fWindowFunction\u3092\u4f7f\u3046\u306a\u3069\u3001\u5404\u51e6\u7406\u306b\u9069\u3057\u305fAPI\u3092\u9078\u629e\u3059\u308b\u3053\u3068\u3067\u3001\u52b9\u7387\u7684\u306a\u5b9f\u88c5\u304c\u53ef\u80fd\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u4f7f\u3063\u305f\u30e6\u30fc\u30b6\u30fc\u884c\u52d5\u5206\u6790\u306e\u30b3\u30fc\u30c9\u4f8b\u3092\u4ee5\u4e0b\u306b\u793a\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u30ed\u30b0\u30c7\u30fc\u30bf\u3092JSON\u30d5\u30a1\u30a4\u30eb\u304b\u3089\u8aad\u307f\u8fbc\u307f\u3001\u30bb\u30c3\u30b7\u30e7\u30f3\u5316\u3092\u884c\u3044\u3001\u30e6\u30fc\u30b6\u30fc\u3054\u3068\u306e\u30bb\u30c3\u30b7\u30e7\u30f3\u6570\u3068\u5e73\u5747\u30bb\u30c3\u30b7\u30e7\u30f3\u6642\u9593\u3092\u8a08\u7b97\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from pyspark.sql import SparkSession\nfrom pyspark.sql.functions import col, from_unixtime, session_window, count, avg\nfrom pyspark.sql.types import StructType, StructField, StringType, LongType\n\n# SparkSession\u306e\u4f5c\u6210\nspark = SparkSession.builder \\\n    .appName(\"User Behavior Analysis\") \\\n    .getOrCreate()\n\n# \u30ed\u30b0\u30c7\u30fc\u30bf\u306e\u30b9\u30ad\u30fc\u30de\u5b9a\u7fa9\nlog_schema = StructType([\n    StructField(\"user_id\", StringType(), True),\n    StructField(\"timestamp\", LongType(), True),\n    StructField(\"action\", StringType(), True)\n])\n\n# JSON\u30d5\u30a1\u30a4\u30eb\u304b\u3089\u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\nlogs = spark.read \\\n    .schema(log_schema) \\\n    .json(\"user_logs.json\")\n\n# \u30bf\u30a4\u30e0\u30b9\u30bf\u30f3\u30d7\u3092UNIX\u6642\u9593\u304b\u3089\u30bf\u30a4\u30e0\u30b9\u30bf\u30f3\u30d7\u578b\u306b\u5909\u63db\nlogs = logs.withColumn(\"timestamp\", from_unixtime(col(\"timestamp\")))\n\n# \u30bb\u30c3\u30b7\u30e7\u30f3\u5316\u306e\u305f\u3081\u306e\u30a6\u30a3\u30f3\u30c9\u30a6\u5b9a\u7fa9\nsession_window_spec = session_window(logs.timestamp, \"30 minutes\")\n\n# \u30bb\u30c3\u30b7\u30e7\u30f3\u5316\u3068\u30bb\u30c3\u30b7\u30e7\u30f3\u6570\u30fb\u5e73\u5747\u30bb\u30c3\u30b7\u30e7\u30f3\u6642\u9593\u306e\u8a08\u7b97\nuser_sessions = logs \\\n    .groupBy(col(\"user_id\"), session_window_spec.alias(\"session\")) \\\n    .agg(count(\"*\").alias(\"actions_per_session\")) \\\n    .groupBy(\"user_id\") \\\n    .agg(\n        count(\"*\").alias(\"num_sessions\"),\n        avg(\"actions_per_session\").alias(\"avg_actions_per_session\"),\n        avg(col(\"session\").end.cast(\"long\") - col(\"session\").start.cast(\"long\")).alias(\"avg_session_duration\")\n    )\n\n# \u7d50\u679c\u306e\u8868\u793a\nuser_sessions.show(10, False)\n\n# SparkSession\u306e\u505c\u6b62\nspark.stop()<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3067\u306f\u3001\u4ee5\u4e0b\u306e\u51e6\u7406\u3092\u884c\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><code>SparkSession<\/code>\u3092\u4f5c\u6210\u3057\u3001Spark\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u3092\u521d\u671f\u5316\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u30ed\u30b0\u30c7\u30fc\u30bf\u306e\u30b9\u30ad\u30fc\u30de\u3092<code>StructType<\/code>\u3068<code>StructField<\/code>\u3092\u4f7f\u3063\u3066\u5b9a\u7fa9\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u30e6\u30fc\u30b6\u30fcID\u3001\u30bf\u30a4\u30e0\u30b9\u30bf\u30f3\u30d7\u3001\u30a2\u30af\u30b7\u30e7\u30f3\u306e3\u3064\u306e\u30d5\u30a3\u30fc\u30eb\u30c9\u3092\u6301\u3064\u30b9\u30ad\u30fc\u30de\u3092\u5b9a\u7fa9\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>spark.read<\/code>\u3092\u4f7f\u3063\u3066\u3001JSON\u30d5\u30a1\u30a4\u30eb\uff08<code>user_logs.json<\/code>\uff09\u304b\u3089\u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u307e\u3059\u3002<code>schema<\/code>\u30e1\u30bd\u30c3\u30c9\u3067\u3001\u5148\u306b\u5b9a\u7fa9\u3057\u305f\u30b9\u30ad\u30fc\u30de\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>withColumn<\/code>\u3068<code>from_unixtime<\/code>\u3092\u4f7f\u3063\u3066\u3001UNIX\u6642\u9593\u5f62\u5f0f\u306e\u30bf\u30a4\u30e0\u30b9\u30bf\u30f3\u30d7\u3092\u30bf\u30a4\u30e0\u30b9\u30bf\u30f3\u30d7\u578b\u306b\u5909\u63db\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>session_window<\/code>\u95a2\u6570\u3092\u4f7f\u3063\u3066\u3001\u30bb\u30c3\u30b7\u30e7\u30f3\u5316\u306e\u305f\u3081\u306e\u30a6\u30a3\u30f3\u30c9\u30a6\u3092\u5b9a\u7fa9\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u300130\u5206\u306e\u30bb\u30c3\u30b7\u30e7\u30f3\u30a6\u30a3\u30f3\u30c9\u30a6\u3092\u8a2d\u5b9a\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>groupBy<\/code>\u3068<code>agg<\/code>\u3092\u4f7f\u3063\u3066\u3001\u30e6\u30fc\u30b6\u30fcID\u3068\u30bb\u30c3\u30b7\u30e7\u30f3\u30a6\u30a3\u30f3\u30c9\u30a6\u3067\u30b0\u30eb\u30fc\u30d7\u5316\u3057\u3001\u30bb\u30c3\u30b7\u30e7\u30f3\u3054\u3068\u306e\u30a2\u30af\u30b7\u30e7\u30f3\u6570\u3092\u8a08\u7b97\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u3055\u3089\u306b<code>groupBy<\/code>\u3068<code>agg<\/code>\u3092\u4f7f\u3063\u3066\u3001\u30e6\u30fc\u30b6\u30fcID\u3067\u30b0\u30eb\u30fc\u30d7\u5316\u3057\u3001\u30bb\u30c3\u30b7\u30e7\u30f3\u6570\u3001\u5e73\u5747\u30a2\u30af\u30b7\u30e7\u30f3\u6570\u3001\u5e73\u5747\u30bb\u30c3\u30b7\u30e7\u30f3\u6642\u9593\u3092\u8a08\u7b97\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>show<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u7d50\u679c\u3092\u8868\u793a\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u6700\u5f8c\u306b\u3001<code>spark.stop()<\/code>\u3067<code>SparkSession<\/code>\u3092\u505c\u6b62\u3057\u3001\u30ea\u30bd\u30fc\u30b9\u3092\u89e3\u653e\u3057\u307e\u3059\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u3053\u306e\u3088\u3046\u306bPySpark\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5927\u91cf\u306e\u30ed\u30b0\u30c7\u30fc\u30bf\u304b\u3089\u30e6\u30fc\u30b6\u30fc\u3054\u3068\u306e\u30bb\u30c3\u30b7\u30e7\u30f3\u60c5\u5831\u3092\u52b9\u7387\u7684\u306b\u62bd\u51fa\u3057\u3001\u30e6\u30fc\u30b6\u30fc\u884c\u52d5\u306e\u50be\u5411\u3092\u5206\u6790\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002WindowFunction\u3092\u4f7f\u3063\u305f\u30bb\u30c3\u30b7\u30e7\u30f3\u5316\u3084\u3001DataFrame\u306e\u30b0\u30eb\u30fc\u30d7\u5316\u3068\u96c6\u8a08\u64cd\u4f5c\u3092\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u30ed\u30b0\u30c7\u30fc\u30bf\u306e\u5206\u6790\u3092\u7c21\u6f54\u306b\u8868\u73fe\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u306e\u8a66\u904b\u8ee2\u306b\u306f\u4e0b\u306e\u30b3\u30fc\u30c9\u3067\u751f\u6210\u3067\u304d\u308bjson\u30d5\u30a1\u30a4\u30eb\u3092\u7528\u3044\u308b\u3068\u826f\u3044\u3067\u3057\u3087\u3046\u30fb<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import json\nimport random\nimport datetime\n\n# \u30ed\u30b0\u30c7\u30fc\u30bf\u306e\u8a2d\u5b9a\nnum_users = 100\nnum_logs_per_user = 1000\nstart_date = datetime.datetime(2023, 1, 1)\nend_date = datetime.datetime(2023, 12, 31)\n\n# \u30e6\u30fc\u30b6\u30fcID\u3068\u30a2\u30af\u30b7\u30e7\u30f3\u306e\u30ea\u30b9\u30c8\nuser_ids = [f\"user{i}\" for i in range(1, num_users + 1)]\nactions = [\"login\", \"logout\", \"search\", \"view_product\", \"add_to_cart\", \"purchase\"]\n\n# \u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u751f\u6210\u3057\u3066JSON\u30d5\u30a1\u30a4\u30eb\u306b\u51fa\u529b\nwith open(\"user_logs.json\", \"w\") as f:\n    for user_id in user_ids:\n        for _ in range(num_logs_per_user):\n            # \u30e9\u30f3\u30c0\u30e0\u306a\u65e5\u6642\u3092\u751f\u6210\n            timestamp = int(random.uniform(start_date.timestamp(), end_date.timestamp()))\n            \n            # \u30e9\u30f3\u30c0\u30e0\u306a\u30a2\u30af\u30b7\u30e7\u30f3\u3092\u9078\u629e\n            action = random.choice(actions)\n            \n            # \u30ed\u30b0\u30c7\u30fc\u30bf\u3092\u8f9e\u66f8\u3068\u3057\u3066\u4f5c\u6210\n            log_data = {\n                \"user_id\": user_id,\n                \"timestamp\": timestamp,\n                \"action\": action\n            }\n            \n            # \u30ed\u30b0\u30c7\u30fc\u30bf\u3092JSON\u3068\u3057\u3066\u66f8\u304d\u8fbc\u307f\n            f.write(json.dumps(log_data) + \"\\n\")\n\nprint(\"JSON\u30d5\u30a1\u30a4\u30eb\u306e\u751f\u6210\u304c\u5b8c\u4e86\u3057\u307e\u3057\u305f\u3002\")<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-22\">\u6a5f\u68b0\u5b66\u7fd2\u3092\u4f7f\u3063\u305f\u5546\u54c1\u30ec\u30b3\u30e1\u30f3\u30c9<\/h3>\n\n\n\n<p>\u5354\u8abf\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u306e\u904e\u53bb\u306e\u884c\u52d5\u5c65\u6b74\u306b\u57fa\u3065\u3044\u3066\u3001\u30e6\u30fc\u30b6\u30fc\u304c\u8208\u5473\u3092\u6301\u3061\u305d\u3046\u306a\u5546\u54c1\u3092\u63a8\u85a6\u3059\u308b\u624b\u6cd5\u3067\u3059\u3002PySpark\u306eMLlib\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u306a\u5546\u54c1\u30ec\u30b3\u30e1\u30f3\u30c9\u30b7\u30b9\u30c6\u30e0\u3092\u69cb\u7bc9\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u5546\u54c1\u30ec\u30b3\u30e1\u30f3\u30c9\u3067\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u3068\u5546\u54c1\u306e\u76f8\u4e92\u4f5c\u7528\uff08\u8cfc\u5165\u5c65\u6b74\u3001\u8a55\u4fa1\u5c65\u6b74\u306a\u3069\uff09\u3092\u30c7\u30fc\u30bf\u3068\u3057\u3066\u4f7f\u7528\u3057\u307e\u3059\u3002PySpark\u3067\u306f\u3001\u3053\u308c\u3089\u306e\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u3001\u7279\u5fb4\u91cf\u306e\u62bd\u51fa\u3084\u6b20\u640d\u5024\u306e\u51e6\u7406\u306a\u3069\u306e\u524d\u51e6\u7406\u3092\u884c\u3044\u307e\u3059\u3002\u6b21\u306b\u3001ALS\uff08Alternating Least Squares\uff09\u306a\u3069\u306e\u5354\u8abf\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u7528\u3044\u3066\u3001\u30e6\u30fc\u30b6\u30fc\u3068\u5546\u54c1\u306e\u6f5c\u5728\u7684\u306a\u7279\u5fb4\u3092\u5b66\u7fd2\u3057\u307e\u3059\u3002\u6700\u5f8c\u306b\u3001\u5b66\u7fd2\u3055\u308c\u305f\u30e2\u30c7\u30eb\u3092\u4f7f\u3063\u3066\u3001\u30e6\u30fc\u30b6\u30fc\u3054\u3068\u306b\u500b\u5225\u306e\u5546\u54c1\u30ec\u30b3\u30e1\u30f3\u30c9\u3092\u751f\u6210\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>MLlib\u3067\u306f\u3001ALS\u3092\u306f\u3058\u3081\u3068\u3059\u308b\u69d8\u3005\u306a\u5354\u8abf\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u304c\u63d0\u4f9b\u3055\u308c\u3066\u304a\u308a\u3001\u7c21\u5358\u306b\u5546\u54c1\u30ec\u30b3\u30e1\u30f3\u30c9\u30b7\u30b9\u30c6\u30e0\u3092\u69cb\u7bc9\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>PySpark\u306eMLlib\u3092\u4f7f\u3063\u305f\u5546\u54c1\u30ec\u30b3\u30e1\u30f3\u30c9\u306e\u30b3\u30fc\u30c9\u4f8b\u3092\u4ee5\u4e0b\u306b\u793a\u3057\u307e\u3059\u3002\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3067\u306f\u3001ALS\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u4f7f\u3063\u3066\u30e6\u30fc\u30b6\u30fc\u3068\u5546\u54c1\u306e\u6f5c\u5728\u7684\u306a\u7279\u5fb4\u3092\u5b66\u7fd2\u3057\u3001\u30e6\u30fc\u30b6\u30fc\u3054\u3068\u306b\u304a\u3059\u3059\u3081\u306e\u5546\u54c1\u3092\u63a8\u85a6\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from pyspark.sql import SparkSession\nfrom pyspark.ml.recommendation import ALS\nfrom pyspark.sql.functions import col, explode\n\n# SparkSession\u306e\u4f5c\u6210\nspark = SparkSession.builder \\\n    .appName(\"Product Recommendation\") \\\n    .getOrCreate()\n\n# \u30b5\u30f3\u30d7\u30eb\u30c7\u30fc\u30bf\u306e\u4f5c\u6210\ndata = spark.createDataFrame([\n    (0, 0, 4.0), (0, 1, 3.0), (0, 2, 5.0),\n    (1, 0, 5.0), (1, 2, 4.0),\n    (2, 1, 2.0), (2, 2, 3.0),\n    (3, 0, 2.0), (3, 1, 4.0)\n], [\"userId\", \"productId\", \"rating\"])\n\n# \u30c7\u30fc\u30bf\u306e\u5206\u5272\uff08\u30c8\u30ec\u30fc\u30cb\u30f3\u30b0\u30bb\u30c3\u30c8\u3068\u30c6\u30b9\u30c8\u30bb\u30c3\u30c8\uff09\n(training, test) = data.randomSplit([0.8, 0.2])\n\n# ALS\u30e2\u30c7\u30eb\u306e\u4f5c\u6210\nals = ALS(maxIter=10, regParam=0.01, userCol=\"userId\", itemCol=\"productId\", ratingCol=\"rating\",\n          coldStartStrategy=\"drop\")\nmodel = als.fit(training)\n\n# \u5168\u30e6\u30fc\u30b6\u30fc\u306b\u5bfe\u3057\u3066\u5546\u54c1\u3092\u63a8\u85a6\nuser_recs = model.recommendForAllUsers(5)\n\n# \u63a8\u85a6\u7d50\u679c\u306e\u8868\u793a\nuser_recs = user_recs \\\n    .withColumn(\"recommendations\", explode(\"recommendations\")) \\\n    .select(col(\"userId\"), col(\"recommendations.productId\").alias(\"productId\"),\n            col(\"recommendations.rating\").alias(\"rating\"))\n\nuser_recs.show(truncate=False)\n\n# \u8a55\u4fa1\u6307\u6a19\uff08RMSE\uff09\u306e\u8a08\u7b97\npredictions = model.transform(test)\nevaluator = RegressionEvaluator(metricName=\"rmse\", labelCol=\"rating\", predictionCol=\"prediction\")\nrmse = evaluator.evaluate(predictions)\nprint(f\"Root-mean-square error = {rmse}\")\n\n# SparkSession\u306e\u505c\u6b62\nspark.stop()<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3067\u306f\u3001\u4ee5\u4e0b\u306e\u51e6\u7406\u3092\u884c\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><code>SparkSession<\/code>\u3092\u4f5c\u6210\u3057\u3001Spark\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u3092\u521d\u671f\u5316\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u30b5\u30f3\u30d7\u30eb\u30c7\u30fc\u30bf\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u30e6\u30fc\u30b6\u30fcID\u3001\u5546\u54c1ID\u3001\u8a55\u4fa1\u5024\u304b\u3089\u306a\u308b\u30c7\u30fc\u30bf\u3092<code>createDataFrame<\/code>\u30e1\u30bd\u30c3\u30c9\u3067\u4f5c\u6210\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>randomSplit<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u30c7\u30fc\u30bf\u3092\u30c8\u30ec\u30fc\u30cb\u30f3\u30b0\u30bb\u30c3\u30c8\u3068\u30c6\u30b9\u30c8\u30bb\u30c3\u30c8\u306b\u5206\u5272\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>ALS<\/code>\u30af\u30e9\u30b9\u3092\u4f7f\u3063\u3066\u3001ALS\u30e2\u30c7\u30eb\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u6700\u5927\u53cd\u5fa9\u56de\u6570\uff08<code>maxIter<\/code>\uff09\u3001\u6b63\u5247\u5316\u30d1\u30e9\u30e1\u30fc\u30bf\uff08<code>regParam<\/code>\uff09\u3001\u30e6\u30fc\u30b6\u30fc\u5217\uff08<code>userCol<\/code>\uff09\u3001\u30a2\u30a4\u30c6\u30e0\u5217\uff08<code>itemCol<\/code>\uff09\u3001\u8a55\u4fa1\u5217\uff08<code>ratingCol<\/code>\uff09\u3001\u30b3\u30fc\u30eb\u30c9\u30b9\u30bf\u30fc\u30c8\u6226\u7565\uff08<code>coldStartStrategy<\/code>\uff09\u3092\u6307\u5b9a\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>fit<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u30c8\u30ec\u30fc\u30cb\u30f3\u30b0\u30c7\u30fc\u30bf\u3067\u30e2\u30c7\u30eb\u3092\u5b66\u7fd2\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>recommendForAllUsers<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u5168\u30e6\u30fc\u30b6\u30fc\u306b\u5bfe\u3057\u3066\u5546\u54c1\u3092\u63a8\u85a6\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u5404\u30e6\u30fc\u30b6\u30fc\u306b\u5bfe\u3057\u3066\u4e0a\u4f4d5\u3064\u306e\u5546\u54c1\u3092\u63a8\u85a6\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u63a8\u85a6\u7d50\u679c\u3092\u8868\u793a\u3057\u307e\u3059\u3002<code>explode<\/code>\u95a2\u6570\u3092\u4f7f\u3063\u3066\u3001\u63a8\u85a6\u7d50\u679c\u3092\u5c55\u958b\u3057\u3001\u30e6\u30fc\u30b6\u30fcID\u3001\u5546\u54c1ID\u3001\u63a8\u85a6\u30b9\u30b3\u30a2\u3092\u8868\u793a\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>transform<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u30c6\u30b9\u30c8\u30c7\u30fc\u30bf\u306b\u5bfe\u3057\u3066\u30e2\u30c7\u30eb\u3092\u9069\u7528\u3057\u3001\u4e88\u6e2c\u5024\u3092\u53d6\u5f97\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>RegressionEvaluator<\/code>\u3092\u4f7f\u3063\u3066\u3001\u8a55\u4fa1\u6307\u6a19\uff08RMSE\uff09\u3092\u8a08\u7b97\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u6700\u5f8c\u306b\u3001<code>spark.stop()<\/code>\u3067<code>SparkSession<\/code>\u3092\u505c\u6b62\u3057\u3001\u30ea\u30bd\u30fc\u30b9\u3092\u89e3\u653e\u3057\u307e\u3059\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3092\u5b9f\u884c\u3059\u308b\u3068\u3001\u5168\u30e6\u30fc\u30b6\u30fc\u306b\u5bfe\u3059\u308b\u5546\u54c1\u63a8\u85a6\u7d50\u679c\u304c\u8868\u793a\u3055\u308c\u3001\u30e2\u30c7\u30eb\u306e\u8a55\u4fa1\u6307\u6a19\uff08RMSE\uff09\u304c\u51fa\u529b\u3055\u308c\u307e\u3059\u3002\u5b9f\u969b\u306e\u30c7\u30fc\u30bf\u3092\u4f7f\u3046\u5834\u5408\u306f\u3001\u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\u90e8\u5206\u3092\u9069\u5b9c\u5909\u66f4\u3057\u3001\u5fc5\u8981\u306b\u5fdc\u3058\u3066\u30e2\u30c7\u30eb\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u3092\u8abf\u6574\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-23\">\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3092\u4f7f\u3063\u305f\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u7570\u5e38\u691c\u77e5<\/h3>\n\n\n\n<p>IoT\u30c7\u30d0\u30a4\u30b9\u3084\u30bb\u30f3\u30b5\u30fc\u304b\u3089\u9001\u4fe1\u3055\u308c\u308b\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3092\u5206\u6790\u3059\u308b\u3053\u3068\u3067\u3001\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u306b\u7570\u5e38\u3092\u691c\u77e5\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002PySpark\u306eStructured Streaming\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u3092\u7c21\u5358\u306b\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u7570\u5e38\u691c\u77e5\u3067\u306f\u3001\u307e\u305a\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u3001\u30c7\u30fc\u30bf\u306e\u524d\u51e6\u7406\u3092\u884c\u3044\u307e\u3059\u3002\u6b21\u306b\u3001\u79fb\u52d5\u5e73\u5747\u3084\u95be\u5024\u306a\u3069\u306e\u7570\u5e38\u691c\u77e5\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u9069\u7528\u3057\u3001\u7570\u5e38\u304c\u691c\u77e5\u3055\u308c\u305f\u5834\u5408\u306b\u30a2\u30e9\u30fc\u30c8\u3092\u767a\u5831\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>Structured Streaming\u3067\u306f\u3001DataFrame\u3092\u4f7f\u3063\u3066\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3092\u51e6\u7406\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001\u69d8\u3005\u306a\u7570\u5e38\u691c\u77e5\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092Python\u3067\u5b9f\u88c5\u3057\u3001Structured Streaming\u3068\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u306a\u7570\u5e38\u691c\u77e5\u30b7\u30b9\u30c6\u30e0\u3092\u69cb\u7bc9\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>PySpark\u306eStructured Streaming\u3092\u4f7f\u3063\u305f\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u7570\u5e38\u691c\u77e5\u306e\u30b3\u30fc\u30c9\u4f8b\u3092\u4ee5\u4e0b\u306b\u793a\u3057\u307e\u3059\u3002\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3067\u306f\u3001\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u306e\u79fb\u52d5\u5e73\u5747\u3092\u8a08\u7b97\u3057\u3001\u95be\u5024\u3092\u8d85\u3048\u308b\u5024\u3092\u7570\u5e38\u3068\u3057\u3066\u691c\u77e5\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from pyspark.sql import SparkSession\nfrom pyspark.sql.functions import from_json, col, avg, window\n\n# SparkSession\u306e\u4f5c\u6210\nspark = SparkSession.builder \\\n    .appName(\"Real-time Anomaly Detection\") \\\n    .getOrCreate()\n\n# \u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u306e\u30b9\u30ad\u30fc\u30de\u5b9a\u7fa9\nschema = \"timestamp LONG, value DOUBLE\"\n\n# \u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\uff08\u3053\u3053\u3067\u306fSocketTextStream\u3092\u4f7f\u7528\uff09\nstreaming_data = spark \\\n    .readStream \\\n    .format(\"socket\") \\\n    .option(\"host\", \"localhost\") \\\n    .option(\"port\", 9999) \\\n    .schema(schema) \\\n    .load()\n\n# \u79fb\u52d5\u5e73\u5747\u306e\u8a08\u7b97\nmoving_avg = streaming_data \\\n    .withWatermark(\"timestamp\", \"1 minute\") \\\n    .groupBy(window(\"timestamp\", \"1 minute\", \"30 seconds\")) \\\n    .agg(avg(\"value\").alias(\"moving_avg\"))\n\n# \u7570\u5e38\u691c\u77e5\u306e\u305f\u3081\u306eUDF\ndef detect_anomaly(value, moving_avg):\n    threshold = 1.5\n    return value &gt; moving_avg * threshold\n\n# \u7570\u5e38\u691c\u77e5\u306e\u9069\u7528\nanomalies = streaming_data \\\n    .join(moving_avg, on=[streaming_data.timestamp.cast(\"long\") &gt;= moving_avg.window.start,\n                           streaming_data.timestamp.cast(\"long\") &lt; moving_avg.window.end],\n          how=\"inner\") \\\n    .where(detect_anomaly(streaming_data.value, moving_avg.moving_avg)) \\\n    .select(streaming_data.timestamp, streaming_data.value, moving_avg.moving_avg)\n\n# \u7570\u5e38\u691c\u77e5\u7d50\u679c\u306e\u51fa\u529b\nquery = anomalies \\\n    .writeStream \\\n    .outputMode(\"append\") \\\n    .format(\"console\") \\\n    .start()\n\nquery.awaitTermination()<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3067\u306f\u3001\u4ee5\u4e0b\u306e\u51e6\u7406\u3092\u884c\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><code>SparkSession<\/code>\u3092\u4f5c\u6210\u3057\u3001Spark\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u3092\u521d\u671f\u5316\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u306e\u30b9\u30ad\u30fc\u30de\u3092\u5b9a\u7fa9\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u30bf\u30a4\u30e0\u30b9\u30bf\u30f3\u30d7\u3068\u5024\u306e2\u3064\u306e\u30d5\u30a3\u30fc\u30eb\u30c9\u3092\u6301\u3064\u30b9\u30ad\u30fc\u30de\u3092\u5b9a\u7fa9\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>readStream<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001<code>SocketTextStream<\/code>\u3092\u4f7f\u7528\u3057\u3001\u30ed\u30fc\u30ab\u30eb\u30db\u30b9\u30c8\u306e\u6307\u5b9a\u3057\u305f\u30dd\u30fc\u30c8\u304b\u3089\u30c7\u30fc\u30bf\u3092\u53d7\u4fe1\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>withWatermark<\/code>\u3068<code>window<\/code>\u95a2\u6570\u3092\u4f7f\u3063\u3066\u3001\u79fb\u52d5\u5e73\u5747\u3092\u8a08\u7b97\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u30011\u5206\u9593\u306e\u30a6\u30a3\u30f3\u30c9\u30a6\u30b5\u30a4\u30ba\u306830\u79d2\u306e\u30b9\u30e9\u30a4\u30c9\u30a4\u30f3\u30bf\u30fc\u30d0\u30eb\u3092\u8a2d\u5b9a\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u7570\u5e38\u691c\u77e5\u306e\u305f\u3081\u306eUDF\u3092\u5b9a\u7fa9\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u73fe\u5728\u306e\u5024\u304c\u79fb\u52d5\u5e73\u5747\u306e1.5\u500d\u3092\u8d85\u3048\u308b\u5834\u5408\u306b\u7570\u5e38\u3068\u307f\u306a\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>join<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3068\u79fb\u52d5\u5e73\u5747\u3092\u7d50\u5408\u3057\u3001\u7570\u5e38\u691c\u77e5\u3092\u9069\u7528\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u7570\u5e38\u304c\u691c\u77e5\u3055\u308c\u305f\u5834\u5408\u3001\u30bf\u30a4\u30e0\u30b9\u30bf\u30f3\u30d7\u3001\u73fe\u5728\u306e\u5024\u3001\u79fb\u52d5\u5e73\u5747\u3092\u9078\u629e\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>writeStream<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u7570\u5e38\u691c\u77e5\u7d50\u679c\u3092\u30b3\u30f3\u30bd\u30fc\u30eb\u306b\u51fa\u529b\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>awaitTermination<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u547c\u3073\u51fa\u3057\u3001\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30af\u30a8\u30ea\u304c\u7d42\u4e86\u3059\u308b\u307e\u3067\u5f85\u6a5f\u3057\u307e\u3059\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3092\u5b9f\u884c\u3059\u308b\u524d\u306b\u3001\u30ed\u30fc\u30ab\u30eb\u30db\u30b9\u30c8\u306e\u6307\u5b9a\u3057\u305f\u30dd\u30fc\u30c8\u306b\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3092\u9001\u4fe1\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u30c7\u30fc\u30bf\u306e\u9001\u4fe1\u306b\u306f\u3001netcat\u306a\u3069\u306e\u30c4\u30fc\u30eb\u3092\u4f7f\u7528\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4f8b\u3048\u3070\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u3057\u3066\u30c7\u30fc\u30bf\u3092\u9001\u4fe1\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">nc -lk 9999<\/pre>\n\n\n\n<p>\u305d\u306e\u5f8c\u3001\u30b3\u30f3\u30bd\u30fc\u30eb\u306b\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30c7\u30fc\u30bf\u3092\u5165\u529b\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">1623945600000,10.5\n1623945601000,11.2\n1623945602000,9.8\n1623945603000,25.6\n1623945604000,10.1<\/pre>\n\n\n\n<p>\u30b3\u30fc\u30c9\u4f8b\u3092\u5b9f\u884c\u3059\u308b\u3068\u3001\u7570\u5e38\u304c\u691c\u77e5\u3055\u308c\u305f\u5834\u5408\u306b\u30bf\u30a4\u30e0\u30b9\u30bf\u30f3\u30d7\u3001\u73fe\u5728\u306e\u5024\u3001\u79fb\u52d5\u5e73\u5747\u304c\u30b3\u30f3\u30bd\u30fc\u30eb\u306b\u51fa\u529b\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u5b9f\u969b\u306e\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u3067\u306f\u3001\u30c7\u30fc\u30bf\u30bd\u30fc\u30b9\u3084\u30b9\u30ad\u30fc\u30de\u3001\u7570\u5e38\u691c\u77e5\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u9069\u5b9c\u5909\u66f4\u3057\u3001\u5fc5\u8981\u306b\u5fdc\u3058\u3066\u30a2\u30e9\u30fc\u30c8\u306e\u767a\u5831\u65b9\u6cd5\u3092\u8ffd\u52a0\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-24\">SNS\u30c7\u30fc\u30bf\u3092\u4f7f\u3063\u305f\u30a4\u30f3\u30d5\u30eb\u30a8\u30f3\u30b5\u30fc\u5206\u6790<\/h3>\n\n\n\n<p>SNS\u30c7\u30fc\u30bf\u3092\u5206\u6790\u3059\u308b\u3053\u3068\u3067\u3001\u5f71\u97ff\u529b\u306e\u3042\u308b\u30e6\u30fc\u30b6\u30fc\uff08\u30a4\u30f3\u30d5\u30eb\u30a8\u30f3\u30b5\u30fc\uff09\u3092\u7279\u5b9a\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002PySpark\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u306aSNS\u30c7\u30fc\u30bf\u3092\u52b9\u7387\u7684\u306b\u51e6\u7406\u3057\u3001\u30a4\u30f3\u30d5\u30eb\u30a8\u30f3\u30b5\u30fc\u5206\u6790\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30a4\u30f3\u30d5\u30eb\u30a8\u30f3\u30b5\u30fc\u5206\u6790\u3067\u306f\u3001\u307e\u305aSNS\u30c7\u30fc\u30bf\uff08\u30c4\u30a4\u30fc\u30c8\u3001\u30d5\u30a9\u30ed\u30fc\u95a2\u4fc2\u306a\u3069\uff09\u3092\u53ce\u96c6\u3057\u3001\u524d\u51e6\u7406\u3092\u884c\u3044\u307e\u3059\u3002\u6b21\u306b\u3001\u30e6\u30fc\u30b6\u30fc\u3092\u30ce\u30fc\u30c9\u3001\u30d5\u30a9\u30ed\u30fc\u95a2\u4fc2\u3092\u30a8\u30c3\u30b8\u3068\u3057\u305f\u30b0\u30e9\u30d5\u3092\u69cb\u7bc9\u3057\u307e\u3059\u3002\u6700\u5f8c\u306b\u3001PageRank\u306a\u3069\u306e\u30b0\u30e9\u30d5\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u9069\u7528\u3057\u3001\u5f71\u97ff\u529b\u306e\u9ad8\u3044\u30e6\u30fc\u30b6\u30fc\u3092\u7279\u5b9a\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>PySpark\u3067\u306f\u3001GraphFrames\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u306a\u30b0\u30e9\u30d5\u30c7\u30fc\u30bf\u3092\u51e6\u7406\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001GraphFrames\u306b\u306f\u3001PageRank\u3092\u306f\u3058\u3081\u3068\u3059\u308b\u69d8\u3005\u306a\u30b0\u30e9\u30d5\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u304c\u5b9f\u88c5\u3055\u308c\u3066\u304a\u308a\u3001\u7c21\u5358\u306b\u30a4\u30f3\u30d5\u30eb\u30a8\u30f3\u30b5\u30fc\u5206\u6790\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>PySpark\u3068GraphFrames\u3092\u4f7f\u3063\u305fSNS\u30c7\u30fc\u30bf\u306e\u30a4\u30f3\u30d5\u30eb\u30a8\u30f3\u30b5\u30fc\u5206\u6790\u306e\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3092\u4ee5\u4e0b\u306b\u793a\u3057\u307e\u3059\u3002\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3067\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u306e\u30d5\u30a9\u30ed\u30fc\u95a2\u4fc2\u304b\u3089\u30b0\u30e9\u30d5\u3092\u69cb\u7bc9\u3057\u3001PageRank\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u9069\u7528\u3057\u3066\u5f71\u97ff\u529b\u306e\u9ad8\u3044\u30e6\u30fc\u30b6\u30fc\u3092\u7279\u5b9a\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from pyspark.sql import SparkSession\nfrom graphframes import GraphFrame\n\n# SparkSession\u306e\u4f5c\u6210\nspark = SparkSession.builder \\\n    .appName(\"Influencer Analysis\") \\\n    .getOrCreate()\n\n# \u30b5\u30f3\u30d7\u30eb\u30c7\u30fc\u30bf\u306e\u4f5c\u6210\nusers = spark.createDataFrame([\n    (\"u1\", \"Alice\"),\n    (\"u2\", \"Bob\"),\n    (\"u3\", \"Carol\"),\n    (\"u4\", \"Dave\"),\n    (\"u5\", \"Eve\")\n], [\"id\", \"name\"])\n\nfollows = spark.createDataFrame([\n    (\"u1\", \"u2\"),\n    (\"u1\", \"u3\"),\n    (\"u2\", \"u3\"),\n    (\"u3\", \"u4\"),\n    (\"u4\", \"u5\")\n], [\"src\", \"dst\"])\n\n# GraphFrame\u306e\u4f5c\u6210\ngraph = GraphFrame(users, follows)\n\n# PageRank\u306e\u8a08\u7b97\npage_rank = graph.pageRank(resetProbability=0.15, maxIter=10)\n\n# \u7d50\u679c\u306e\u8868\u793a\npage_rank.vertices.orderBy(page_rank.vertices.pagerank.desc()).show()\n\n# \u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u691c\u51fa\ncommunities = graph.labelPropagation(maxIter=10)\n\n# \u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u3054\u3068\u306e\u5f71\u97ff\u529b\u306e\u9ad8\u3044\u30e6\u30fc\u30b6\u30fc\u3092\u8868\u793a\ncommunities.vertices \\\n    .join(page_rank.vertices, on=\"id\") \\\n    .groupBy(\"label\") \\\n    .agg({\"pagerank\": \"max\"}) \\\n    .withColumnRenamed(\"max(pagerank)\", \"max_pagerank\") \\\n    .join(communities.vertices, on=[communities.vertices.label == communities.label,\n                                     communities.vertices.pagerank == communities.max_pagerank]) \\\n    .select(communities.label, communities.vertices.name, communities.vertices.pagerank) \\\n    .orderBy(communities.label.asc()) \\\n    .show()\n\n# SparkSession\u306e\u505c\u6b62\nspark.stop()<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3067\u306f\u3001\u4ee5\u4e0b\u306e\u51e6\u7406\u3092\u884c\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><code>SparkSession<\/code>\u3092\u4f5c\u6210\u3057\u3001Spark\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u3092\u521d\u671f\u5316\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u30b5\u30f3\u30d7\u30eb\u30c7\u30fc\u30bf\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u3068\u30d5\u30a9\u30ed\u30fc\u95a2\u4fc2\u306e\u30c7\u30fc\u30bf\u3092<code>createDataFrame<\/code>\u30e1\u30bd\u30c3\u30c9\u3067\u4f5c\u6210\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>GraphFrame<\/code>\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002\u30e6\u30fc\u30b6\u30fc\u30c7\u30fc\u30bf\u3092\u30ce\u30fc\u30c9\u3001\u30d5\u30a9\u30ed\u30fc\u95a2\u4fc2\u30c7\u30fc\u30bf\u3092\u30a8\u30c3\u30b8\u3068\u3057\u3066\u30b0\u30e9\u30d5\u3092\u69cb\u7bc9\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>pageRank<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001PageRank\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u9069\u7528\u3057\u3001\u5404\u30e6\u30fc\u30b6\u30fc\u306e\u5f71\u97ff\u529b\uff08PageRank\u30b9\u30b3\u30a2\uff09\u3092\u8a08\u7b97\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>PageRank\u30b9\u30b3\u30a2\u306e\u9ad8\u3044\u9806\u306b\u30e6\u30fc\u30b6\u30fc\u3092\u8868\u793a\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li><code>labelPropagation<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u691c\u51fa\u3092\u884c\u3044\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001Label Propagation\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u4f7f\u7528\u3057\u3066\u3044\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u3054\u3068\u306b\u5f71\u97ff\u529b\u306e\u9ad8\u3044\u30e6\u30fc\u30b6\u30fc\u3092\u7279\u5b9a\u3057\u3001\u8868\u793a\u3057\u307e\u3059\u3002<\/li>\n\n\n\n<li>\u6700\u5f8c\u306b\u3001<code>spark.stop()<\/code>\u3067<code>SparkSession<\/code>\u3092\u505c\u6b62\u3057\u3001\u30ea\u30bd\u30fc\u30b9\u3092\u89e3\u653e\u3057\u307e\u3059\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u4f8b\u3092\u5b9f\u884c\u3059\u308b\u3068\u3001\u5168\u4f53\u306e\u5f71\u97ff\u529b\u306e\u9ad8\u3044\u30e6\u30fc\u30b6\u30fc\u3068\u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u3054\u3068\u306e\u5f71\u97ff\u529b\u306e\u9ad8\u3044\u30e6\u30fc\u30b6\u30fc\u304c\u8868\u793a\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u5b9f\u969b\u306eSNS\u30c7\u30fc\u30bf\u3092\u4f7f\u3046\u5834\u5408\u306f\u3001\u30c7\u30fc\u30bf\u306e\u53ce\u96c6\u3068\u524d\u51e6\u7406\u306e\u90e8\u5206\u3092\u9069\u5b9c\u5909\u66f4\u3057\u3001\u5fc5\u8981\u306b\u5fdc\u3058\u3066\u30b0\u30e9\u30d5\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u3092\u8abf\u6574\u3057\u3066\u304f\u3060\u3055\u3044\u3002\u307e\u305f\u3001\u53ef\u8996\u5316\u30c4\u30fc\u30eb\u3068\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u30a4\u30f3\u30d5\u30eb\u30a8\u30f3\u30b5\u30fc\u306e\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u69cb\u9020\u3092\u308f\u304b\u308a\u3084\u3059\u304f\u53ef\u8996\u5316\u3059\u308b\u3053\u3068\u3082\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0a\u3001PySpark\u3092\u4f7f\u3063\u305f5\u3064\u306e\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u3092\u7d39\u4ecb\u3057\u307e\u3057\u305f\u3002PySpark\u306f\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u51e6\u7406\u306b\u9069\u3057\u305f\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3067\u3042\u308a\u3001\u69d8\u3005\u306a\u9818\u57df\u3067\u6d3b\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u6b21\u7ae0\u3067\u306f\u3001PySpark\u306e\u30a8\u30b3\u30b7\u30b9\u30c6\u30e0\u3068\u904b\u7528\u3001\u9ad8\u5ea6\u306a\u8a71\u984c\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-25\">PySpark\u306e\u30a8\u30b3\u30b7\u30b9\u30c6\u30e0\u3068\u904b\u7528\u3001\u9ad8\u5ea6\u306a\u8a71\u984c<\/h2>\n\n\n\n<p>PySpark\u306f\u3001\u5358\u72ec\u3067\u4f7f\u7528\u3059\u308b\u3060\u3051\u3067\u306a\u304f\u3001\u69d8\u3005\u306a\u30c4\u30fc\u30eb\u3084\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3068\u9023\u643a\u3059\u308b\u3053\u3068\u3067\u3001\u3088\u308a\u9ad8\u5ea6\u306a\u30c7\u30fc\u30bf\u51e6\u7406\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001PySpark\u306e\u30a8\u30b3\u30b7\u30b9\u30c6\u30e0\u3084\u904b\u7528\u306b\u95a2\u3059\u308b\u8a71\u984c\u3001\u304a\u3088\u3073PySpark\u3092\u4f7f\u3063\u305f\u9ad8\u5ea6\u306a\u30c7\u30fc\u30bf\u51e6\u7406\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-26\">Jupyter\u3084Zeppelin\u306a\u3069\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u74b0\u5883\u3068\u306e\u9023\u643a\u65b9\u6cd5<\/h3>\n\n\n\n<p>PySpark\u306f\u3001Jupyter\u3084Zeppelin\u306a\u3069\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u74b0\u5883\u3068\u9023\u643a\u3059\u308b\u3053\u3068\u3067\u3001\u30a4\u30f3\u30bf\u30e9\u30af\u30c6\u30a3\u30d6\u306b\u30c7\u30fc\u30bf\u51e6\u7406\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u74b0\u5883\u3067\u306f\u3001PySpark kernel\u3092\u8a2d\u5b9a\u3059\u308b\u3053\u3068\u3067\u3001PySpark\u30b3\u30fc\u30c9\u3092\u76f4\u63a5\u5b9f\u884c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u74b0\u5883\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u30c7\u30fc\u30bf\u306e\u53ef\u8996\u5316\u3084\u63a2\u7d22\u7684\u30c7\u30fc\u30bf\u5206\u6790\u304c\u3057\u3084\u3059\u304f\u306a\u308a\u307e\u3059\u3002\u307e\u305f\u3001\u30b3\u30fc\u30c9\u3068\u5b9f\u884c\u7d50\u679c\u3092\u540c\u3058\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u5185\u3067\u7ba1\u7406\u3067\u304d\u308b\u305f\u3081\u3001\u958b\u767a\u306e\u52b9\u7387\u304c\u5411\u4e0a\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-27\">Airflow\u3092\u4f7f\u3063\u305f\u30ef\u30fc\u30af\u30d5\u30ed\u30fc\u7ba1\u7406<\/h3>\n\n\n\n<p>Airflow\u306f\u3001\u30c7\u30fc\u30bf\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u3084\u30ef\u30fc\u30af\u30d5\u30ed\u30fc\u3092\u7ba1\u7406\u3059\u308b\u305f\u3081\u306e\u30aa\u30fc\u30d7\u30f3\u30bd\u30fc\u30b9\u30d7\u30e9\u30c3\u30c8\u30d5\u30a9\u30fc\u30e0\u3067\u3059\u3002PySpark\u3092Airflow\u3068\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u8907\u96d1\u306a\u30c7\u30fc\u30bf\u51e6\u7406\u30ef\u30fc\u30af\u30d5\u30ed\u30fc\u3092\u52b9\u7387\u7684\u306b\u7ba1\u7406\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>Airflow\u3067\u306f\u3001PySpark Operator\u3092\u4f7f\u7528\u3057\u3066PySpark\u30b8\u30e7\u30d6\u3092\u5b9f\u884c\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001\u30ef\u30fc\u30af\u30d5\u30ed\u30fc\u306e\u30b9\u30b1\u30b8\u30e5\u30fc\u30ea\u30f3\u30b0\u3084\u76e3\u8996\u3001\u5931\u6557\u6642\u306e\u518d\u8a66\u884c\u306a\u3069\u306e\u6a5f\u80fd\u3082\u63d0\u4f9b\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-28\">Kafka\u306a\u3069\u4ed6\u306e\u30c7\u30fc\u30bf\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u30c4\u30fc\u30eb\u3068\u306e\u9023\u643a<\/h3>\n\n\n\n<p>PySpark\u306f\u3001Kafka\u306a\u3069\u306e\u4ed6\u306e\u30c7\u30fc\u30bf\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u30c4\u30fc\u30eb\u3068\u9023\u643a\u3059\u308b\u3053\u3068\u3067\u3001\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u30c7\u30fc\u30bf\u51e6\u7406\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002Kafka\u306f\u30e1\u30c3\u30bb\u30fc\u30b8\u30f3\u30b0\u30b7\u30b9\u30c6\u30e0\u3067\u3042\u308bApache Kafka\u306e\u30af\u30e9\u30a4\u30a2\u30f3\u30c8\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3042\u308a\u3001PySpark\u304b\u3089Kafka\u306e\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3093\u3067\u51e6\u7406\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>Structured Streaming\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001Kafka\u304b\u3089\u306e\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u3092\u7c21\u5358\u306b\u51e6\u7406\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001Kafka Connector\u3084Kafka Streams\u306a\u3069\u306e\u30c4\u30fc\u30eb\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u3088\u308a\u9ad8\u5ea6\u306a\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u51e6\u7406\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-29\">Pandas\u3068PySpark\u3092\u4f7f\u3044\u5206\u3051\u308b\u57fa\u6e96<\/h3>\n\n\n\n<p>Pandas\u3068PySpark\u306f\u3001\u3068\u3082\u306b\u30c7\u30fc\u30bf\u51e6\u7406\u306b\u4f7f\u7528\u3055\u308c\u308bPython\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3059\u304c\u3001\u305d\u308c\u305e\u308c\u5f97\u610f\u3068\u3059\u308b\u9818\u57df\u304c\u7570\u306a\u308a\u307e\u3059\u3002Pandas\u306f\u3001\u5c0f\u301c\u4e2d\u898f\u6a21\u306e\u30c7\u30fc\u30bf\u3092\u9ad8\u901f\u306b\u51e6\u7406\u3059\u308b\u3053\u3068\u306b\u9069\u3057\u3066\u304a\u308a\u3001\u8c4a\u5bcc\u306a\u6a5f\u80fd\u3092\u63d0\u4f9b\u3057\u3066\u3044\u307e\u3059\u3002\u4e00\u65b9\u3001PySpark\u306f\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u5206\u6563\u51e6\u7406\u306b\u9069\u3057\u3066\u304a\u308a\u3001\u30b9\u30b1\u30fc\u30e9\u30d3\u30ea\u30c6\u30a3\u304c\u9ad8\u3044\u3068\u3044\u3046\u7279\u5fb4\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30c7\u30fc\u30bf\u30b5\u30a4\u30ba\u3084\u51e6\u7406\u306e\u8907\u96d1\u3055\u3001\u6c42\u3081\u3089\u308c\u308b\u51e6\u7406\u901f\u5ea6\u306a\u3069\u3092\u8003\u616e\u3057\u3066\u3001Pandas\u3068PySpark\u3092\u4f7f\u3044\u5206\u3051\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u307e\u305f\u3001Pandas\u3068PySpark\u3092\u7d44\u307f\u5408\u308f\u305b\u3066\u4f7f\u7528\u3059\u308b\u3053\u3068\u3067\u3001\u305d\u308c\u305e\u308c\u306e\u9577\u6240\u3092\u6d3b\u304b\u3057\u305f\u30c7\u30fc\u30bf\u51e6\u7406\u304c\u53ef\u80fd\u3067\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-30\">PySpark\u3068Deep Learning\u306e\u7d44\u307f\u5408\u308f\u305b\u65b9<\/h3>\n\n\n\n<p>PySpark\u306f\u3001TensorFlow\u3084Keras\u306a\u3069\u306eDeep Learning\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3068\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u306a\u6a5f\u68b0\u5b66\u7fd2\u30fb\u6df1\u5c64\u5b66\u7fd2\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002PySpark\u3092\u4f7f\u3063\u3066\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u3092\u524d\u51e6\u7406\u3057\u3001TensorFlow\u3084Keras\u3092\u4f7f\u3063\u3066\u30e2\u30c7\u30eb\u306e\u5b66\u7fd2\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001Spark\u306eMLlib\u3068Deep Learning\u3092\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u7279\u5fb4\u91cf\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u3068\u6df1\u5c64\u5b66\u7fd2\u3092\u7d44\u307f\u5408\u308f\u305b\u305f\u9ad8\u5ea6\u306a\u6a5f\u68b0\u5b66\u7fd2\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u3092\u69cb\u7bc9\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-31\">PySpark\u3092\u30af\u30e9\u30a6\u30c9\u74b0\u5883\uff08AWS\u3001GCP\u3001Azure\uff09\u3067\u904b\u7528\u3059\u308b\u65b9\u6cd5<\/h3>\n\n\n\n<p>PySpark\u306f\u3001AWS\u3001GCP\u3001Azure\u306a\u3069\u306e\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u904b\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u306f\u3001\u30aa\u30f3\u30c7\u30de\u30f3\u30c9\u3067\u30b3\u30f3\u30d4\u30e5\u30fc\u30c6\u30a3\u30f3\u30b0\u30ea\u30bd\u30fc\u30b9\u3092\u8abf\u9054\u3067\u304d\u308b\u305f\u3081\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u51e6\u7406\u306b\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>AWS\u3067\u306fEMR\u3001GCP\u3067\u306fDataproc\u3001Azure\u3067\u306fHDInsight\u3092\u4f7f\u7528\u3057\u3066\u3001PySpark\u30af\u30e9\u30b9\u30bf\u3092\u69cb\u7bc9\u30fb\u7ba1\u7406\u3067\u304d\u307e\u3059\u3002\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067PySpark\u3092\u904b\u7528\u3059\u308b\u969b\u306f\u3001\u30af\u30e9\u30b9\u30bf\u69cb\u6210\u306e\u6700\u9069\u5316\u3001\u30c7\u30fc\u30bf\u306e\u7ba1\u7406\u3001\u30b3\u30b9\u30c8\u524a\u6e1b\u306e\u305f\u3081\u306e\u30d9\u30b9\u30c8\u30d7\u30e9\u30af\u30c6\u30a3\u30b9\u306b\u7559\u610f\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0a\u3001PySpark\u306e\u30a8\u30b3\u30b7\u30b9\u30c6\u30e0\u3068\u904b\u7528\u3001\u9ad8\u5ea6\u306a\u8a71\u984c\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3057\u305f\u3002PySpark\u306f\u3001\u69d8\u3005\u306a\u30c4\u30fc\u30eb\u3084\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3068\u9023\u643a\u3059\u308b\u3053\u3068\u3067\u3001\u3088\u308a\u9ad8\u5ea6\u3067\u8907\u96d1\u306a\u30c7\u30fc\u30bf\u51e6\u7406\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u306e\u904b\u7528\u306b\u3088\u308a\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u51e6\u7406\u306b\u5bfe\u5fdc\u3067\u304d\u307e\u3059\u3002PySpark\u3092\u6d3b\u7528\u3059\u308b\u3053\u3068\u3067\u3001\u30d3\u30c3\u30b0\u30c7\u30fc\u30bf\u51e6\u7406\u306e\u53ef\u80fd\u6027\u304c\u5927\u304d\u304f\u5e83\u304c\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-32\">\u307e\u3068\u3081<\/h2>\n\n\n\n<p>\u672c\u8a18\u4e8b\u3067\u306f\u3001PySpark\u521d\u5fc3\u8005\u306e\u305f\u3081\u306b\u3001PySpark\u306e\u57fa\u790e\u304b\u3089\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u3001\u30a8\u30b3\u30b7\u30b9\u30c6\u30e0\u307e\u3067\u3001\u5e45\u5e83\u304f\u89e3\u8aac\u3057\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<p>PySpark\u306f\u3001Apache Spark\u306e Python API\u3067\u3042\u308a\u3001\u30d3\u30c3\u30b0\u30c7\u30fc\u30bf\u51e6\u7406\u306b\u9069\u3057\u305f\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3067\u3059\u3002Python\u3068Spark\u3092\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001Python\u306e\u8c4a\u5bcc\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u3084\u30c4\u30fc\u30eb\u3092\u6d3b\u7528\u3057\u306a\u304c\u3089\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>PySpark\u3092\u4f7f\u3044\u3053\u306a\u3059\u306b\u306f\u3001RDD\u3084DataFrame\u306a\u3069\u306e\u57fa\u672c\u7684\u306a\u30c7\u30fc\u30bf\u69cb\u9020\u3084\u64cd\u4f5c\u3001Spark\u306e\u5185\u90e8\u52d5\u4f5c\u3001\u6a5f\u68b0\u5b66\u7fd2\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u4f7f\u3044\u65b9\u306a\u3069\u3001\u69d8\u3005\u306a\u77e5\u8b58\u304c\u5fc5\u8981\u3067\u3059\u3002\u307e\u305f\u3001\u5b9f\u969b\u306e\u30c7\u30fc\u30bf\u51e6\u7406\u3067\u306f\u3001ETL\u51e6\u7406\u3001\u30e6\u30fc\u30b6\u30fc\u884c\u52d5\u5206\u6790\u3001\u5546\u54c1\u30ec\u30b3\u30e1\u30f3\u30c9\u3001\u7570\u5e38\u691c\u77e5\u3001\u30a4\u30f3\u30d5\u30eb\u30a8\u30f3\u30b5\u30fc\u5206\u6790\u306a\u3069\u3001\u591a\u5c90\u306b\u308f\u305f\u308b\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u304c\u5b58\u5728\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3055\u3089\u306b\u3001PySpark\u3092\u3088\u308a\u52b9\u679c\u7684\u306b\u6d3b\u7528\u3059\u308b\u306b\u306f\u3001Jupyter\u3084Zeppelin\u306a\u3069\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u74b0\u5883\u3001Airflow\u306b\u3088\u308b\u30ef\u30fc\u30af\u30d5\u30ed\u30fc\u7ba1\u7406\u3001Kafka\u306a\u3069\u306e\u4ed6\u306e\u30c7\u30fc\u30bf\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u30c4\u30fc\u30eb\u3068\u306e\u9023\u643a\u3001\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u306e\u904b\u7528\u306a\u3069\u3001\u30a8\u30b3\u30b7\u30b9\u30c6\u30e0\u3084\u904b\u7528\u9762\u306e\u77e5\u8b58\u3082\u91cd\u8981\u3067\u3059\u3002<\/p>\n\n\n\n<p>PySpark\u306f\u3001\u30d3\u30c3\u30b0\u30c7\u30fc\u30bf\u51e6\u7406\u306e\u5206\u91ce\u3067\u5927\u304d\u306a\u53ef\u80fd\u6027\u3092\u79d8\u3081\u3066\u3044\u307e\u3059\u3002\u672c\u8a18\u4e8b\u3067\u7d39\u4ecb\u3057\u305f\u5185\u5bb9\u3092\u53c2\u8003\u306b\u3001PySpark\u306e\u5b66\u7fd2\u3092\u9032\u3081\u3001\u30c7\u30fc\u30bf\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u3084\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u306e\u30b9\u30ad\u30eb\u3092\u78e8\u3044\u3066\u3044\u305f\u3060\u3051\u308c\u3070\u5e78\u3044\u3067\u3059\u3002PySpark\u3092\u4f7f\u3044\u3053\u306a\u3059\u3053\u3068\u3067\u3001\u30d3\u30c3\u30b0\u30c7\u30fc\u30bf\u6642\u4ee3\u306e\u30d3\u30b8\u30cd\u30b9\u306b\u5927\u304d\u304f\u8ca2\u732e\u3067\u304d\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Warning: Undefined array key &#8220;is_admin&#8221; in \/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango- &#8230; <\/p>\n","protected":false},"author":1,"featured_media":467,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":{"0":"post-359","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","7":"category-python"},"_links":{"self":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts\/359","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=359"}],"version-history":[{"count":7,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts\/359\/revisions"}],"predecessor-version":[{"id":446,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts\/359\/revisions\/446"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/media\/467"}],"wp:attachment":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=359"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=359"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=359"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}