{"id":685,"date":"2024-05-04T23:29:12","date_gmt":"2024-05-04T14:29:12","guid":{"rendered":"https:\/\/chocottopro.com\/?p=685"},"modified":"2024-05-04T23:29:12","modified_gmt":"2024-05-04T14:29:12","slug":"dask%e5%85%a5%e9%96%80%ef%bc%9apython%e3%81%a7%e5%a4%a7%e8%a6%8f%e6%a8%a1%e3%83%87%e3%83%bc%e3%82%bf%e3%81%ae%e9%ab%98%e9%80%9f%e5%87%a6%e7%90%86%e3%82%92%e5%ae%9f%e7%8f%be%e3%81%99%e3%82%8b%e4%b8%a6","status":"publish","type":"post","link":"https:\/\/chocottopro.com\/?p=685","title":{"rendered":"Dask\u5165\u9580\uff1aPython\u3067\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u9ad8\u901f\u51e6\u7406\u3092\u5b9f\u73fe\u3059\u308b\u4e26\u5217\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea"},"content":{"rendered":"\n<p>Python\u3067\u30c7\u30fc\u30bf\u51e6\u7406\u3084\u6a5f\u68b0\u5b66\u7fd2\u3092\u884c\u3046\u969b\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u3092\u9ad8\u901f\u306b\u51e6\u7406\u3059\u308b\u5fc5\u8981\u306b\u8feb\u3089\u308c\u308b\u3053\u3068\u304c\u3042\u308a\u307e\u3059\u3002\u305d\u3093\u306a\u6642\u3001\u4e26\u5217\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u300cDask\u300d\u304c\u5f37\u529b\u306a\u52a9\u3063\u4eba\u3068\u306a\u308a\u307e\u3059\u3002\u672c\u8a18\u4e8b\u3067\u306f\u3001Dask\u306e\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9\u304b\u3089\u5b9f\u8df5\u7684\u306a\u30c6\u30af\u30cb\u30c3\u30af\u3001\u6d3b\u7528\u4e8b\u4f8b\u307e\u3067\u8a73\u3057\u304f\u89e3\u8aac\u3057\u307e\u3059\u3002Dask\u3092\u4f7f\u3044\u3053\u306a\u3057\u3066\u3001Python\u3067\u306e\u30c7\u30fc\u30bf\u51e6\u7406\u3068\u6a5f\u68b0\u5b66\u7fd2\u3092\u52b9\u7387\u5316\u3057\u307e\u3057\u3087\u3046\uff01<\/p>\n\n\n\n<div class=\"wp-block-sgb-block-simple sgb-box-simple sgb-box-simple--title-normal sgb-box-simple--with-border\"><div style=\"background-color:var(--wp--preset--color--sango-main);color:#FFF\" class=\"sgb-box-simple__title\">\u3053\u306e\u8a18\u4e8b\u3092\u8aad\u3093\u3060\u3089\u308f\u304b\u308b\u3053\u3068<\/div><div class=\"sgb-box-simple__body\" style=\"border-color:var(--wp--preset--color--sango-main);background-color:#FFF\">\n<ul class=\"wp-block-list\">\n<li>Dask\u306e\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9\u3068\u4e26\u5217\u51e6\u7406\u306e\u4ed5\u7d44\u307f <\/li>\n\n\n\n<li>Dask Array\u3001Dask DataFrame\u3001Dask Delayed\u306e\u6d3b\u7528\u65b9\u6cd5 <\/li>\n\n\n\n<li>\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u3084\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306e\u6700\u9069\u5316\u306b\u3088\u308b\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u6539\u5584 <\/li>\n\n\n\n<li>\u6a5f\u68b0\u5b66\u7fd2\u3084ETL\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u3067\u306eDask\u306e\u5b9f\u8df5\u7684\u306a\u6d3b\u7528\u4e8b\u4f8b <\/li>\n\n\n\n<li>\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067Dask\u3092\u5229\u7528\u3059\u308b\u65b9\u6cd5 <\/li>\n\n\n\n<li>Python\u30a8\u30f3\u30b8\u30cb\u30a2\u304cDask\u3092\u4f7f\u3046\u3079\u304d\u72b6\u6cc1\u3068\u5f97\u3089\u308c\u308b\u30e1\u30ea\u30c3\u30c8<\/li>\n<\/ul>\n<\/div><\/div>\n\n\n\n<div class=\"toc\"><br \/>\n<b>Warning<\/b>:  Undefined array key \"is_admin\" in <b>\/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>116<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_category_top\" in <b>\/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>121<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_top\" in <b>\/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>128<\/b><br \/>\n    <div id=\"toc_container\" class=\"sgb-toc--bullets js-smooth-scroll\" data-dialog-title=\"Table of Contents\">\n      <p class=\"toc_title\">\u76ee\u6b21 <\/p>\n      <ul class=\"toc_list\">  <li class=\"first\">    <a href=\"#i-0\">Dask\u3068\u306f\uff1fPython\u30e6\u30fc\u30b6\u30fc\u5fc5\u898b\u306e\u4e26\u5217\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-1\">Dask\u306e\u57fa\u672c\u6982\u5ff5\u3068\u7279\u5fb4<\/a>      <\/li>      <li>        <a href=\"#i-2\">\u306a\u305cDask\u304c\u6ce8\u76ee\u3092\u96c6\u3081\u3066\u3044\u308b\u306e\u304b<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-3\">Dask\u3092\u4f7f\u3046\u30e1\u30ea\u30c3\u30c8<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-4\">\u30b7\u30f3\u30d7\u30eb\u306a\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067Dask\u306e\u4f7f\u3044\u65b9\u3092\u7406\u89e3\u3057\u3088\u3046<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-5\">Dask\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u65b9\u6cd5<\/a>      <\/li>      <li>        <a href=\"#i-6\">Dask\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u306e\u57fa\u672c\u7684\u306a\u66f8\u304d\u65b9<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-7\">Dask\u3092\u4f7f\u3046\u3053\u3068\u3067\u3069\u308c\u304f\u3089\u3044\u51e6\u7406\u304c\u901f\u304f\u306a\u308b\u306e\u304b\u691c\u8a3c<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-8\">Dask\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u306e\u30c6\u30af\u30cb\u30c3\u30af<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-9\">Dask\u306e\u30b9\u30b1\u30b8\u30e5\u30fc\u30e9\u30fc\u306e\u9078\u3073\u65b9<\/a>      <\/li>      <li>        <a href=\"#i-10\">Dask Array\u306e\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u306e\u6700\u9069\u5316<\/a>      <\/li>      <li>        <a href=\"#i-11\">Dask DataFrame\u306e\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306e\u6700\u9069\u5316<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-12\">Pandas\u3068Dask\u3092\u4f75\u7528\u3059\u308b\u65b9\u6cd5<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-13\">Dask\u306e\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u3068\u5fdc\u7528\u4f8b<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-14\">\u6a5f\u68b0\u5b66\u7fd2\u3067\u306eDask\u306e\u6d3b\u7528\u65b9\u6cd5<\/a>      <\/li>      <li>        <a href=\"#i-15\">Dask\u3092\u4f7f\u3063\u305fETL\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u69cb\u7bc9<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-16\">Dask\u3092\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u5229\u7528\u3059\u308b\u65b9\u6cd5<\/a>      <\/li>    <\/ul>  <\/li>  <li class=\"last\">    <a href=\"#i-17\">\u307e\u3068\u3081\uff1aPython\u30a8\u30f3\u30b8\u30cb\u30a2\u5fc5\u643a\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u300cDask\u300d\u3092\u4f7f\u3044\u3053\u306a\u305d\u3046\uff01<\/a>  <\/li><\/ul>\n      \n    <\/div><\/div><h2 class=\"wp-block-heading\" id=\"i-0\">Dask\u3068\u306f\uff1fPython\u30e6\u30fc\u30b6\u30fc\u5fc5\u898b\u306e\u4e26\u5217\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea<\/h2>\n\n\n\n<p>Dask\u306f\u3001Python\u3067\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u4e26\u5217\u51e6\u7406\u3092\u884c\u3046\u305f\u3081\u306e\u5f37\u529b\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3059\u3002NumPy\u3084Pandas\u306a\u3069\u306e\u4eba\u6c17\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u540c\u69d8\u306e\u30a4\u30f3\u30bf\u30fc\u30d5\u30a7\u30fc\u30b9\u3092\u63d0\u4f9b\u3057\u306a\u304c\u3089\u3001\u30e1\u30e2\u30ea\u306b\u8f09\u308a\u5207\u3089\u306a\u3044\u3088\u3046\u306a\u30d3\u30c3\u30b0\u30c7\u30fc\u30bf\u3092\u6271\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-1\">Dask\u306e\u57fa\u672c\u6982\u5ff5\u3068\u7279\u5fb4<\/h3>\n\n\n\n<p>Dask\u306e\u4e2d\u6838\u3068\u306a\u308b\u306e\u306f\u3001\u30bf\u30b9\u30af\u30b0\u30e9\u30d5\u3092\u7528\u3044\u305f\u4e26\u5217\u51e6\u7406\u306e\u4ed5\u7d44\u307f\u3067\u3059\u3002\u30bf\u30b9\u30af\u30b0\u30e9\u30d5\u306f\u3001\u5404\u30bf\u30b9\u30af\uff08\u51e6\u7406\u306e\u5358\u4f4d\uff09\u3092\u30ce\u30fc\u30c9\u3068\u3057\u3001\u30bf\u30b9\u30af\u9593\u306e\u4f9d\u5b58\u95a2\u4fc2\u3092\u30a8\u30c3\u30b8\u3067\u8868\u73fe\u3057\u305f\u30b0\u30e9\u30d5\u69cb\u9020\u3067\u3059\u3002Dask\u306f\u3001\u3053\u306e\u30bf\u30b9\u30af\u30b0\u30e9\u30d5\u3092\u81ea\u52d5\u7684\u306b\u69cb\u7bc9\u3057\u3001\u6700\u9069\u5316\u3084\u30b9\u30b1\u30b8\u30e5\u30fc\u30ea\u30f3\u30b0\u3092\u884c\u3046\u3053\u3068\u3067\u3001\u52b9\u7387\u7684\u306a\u4e26\u5217\u51e6\u7406\u3092\u5b9f\u73fe\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001Dask\u306f\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u4e3b\u8981\u306a\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u3092\u63d0\u4f9b\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Dask Array: NumPy\u306e\u3088\u3046\u306aN\u6b21\u5143\u914d\u5217\u3092\u5206\u5272\u3057\u3001\u4e26\u5217\u51e6\u7406\u3092\u53ef\u80fd\u306b\u3059\u308b<\/li>\n\n\n\n<li>Dask DataFrame: Pandas\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u3092\u6a21\u5023\u3057\u3001\u5927\u898f\u6a21\u306a\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u6271\u3048\u308b<\/li>\n\n\n\n<li>Dask Bag: Python\u306eList\u3084Iterable\u3092\u4e26\u5217\u51e6\u7406\u3059\u308b\u305f\u3081\u306e\u30b3\u30ec\u30af\u30b7\u30e7\u30f3<\/li>\n\n\n\n<li>Dask Delayed: \u4efb\u610f\u306ePython\u95a2\u6570\u306e\u9045\u5ef6\u8a55\u4fa1\u3092\u53ef\u80fd\u306b\u3057\u3001\u4e26\u5217\u51e6\u7406\u3092\u5b9f\u73fe\u3059\u308b<\/li>\n<\/ul>\n\n\n\n<p>\u3053\u308c\u3089\u306e\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001NumPy\u3084Pandas\u3067\u306e\u51e6\u7406\u3092\u307b\u307c\u305d\u306e\u307e\u307eDask\u306b\u7f6e\u304d\u63db\u3048\u308b\u3053\u3068\u304c\u3067\u304d\u3001\u5c0e\u5165\u306e\u30cf\u30fc\u30c9\u30eb\u304c\u4f4e\u3044\u306e\u304c\u7279\u5fb4\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0b\u306f\u3001Dask Array\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u306e\u7c21\u5358\u306a\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import dask.array as da\n\n# \u30c0\u30df\u30fc\u306e\u30c7\u30fc\u30bf\u3092\u4f5c\u6210\ndata = da.random.random((1000, 1000), chunks=(500, 500))\n\n# \u5206\u5272\u3055\u308c\u305f\u30c1\u30e3\u30f3\u30af\u4e0a\u3067\u4e26\u5217\u306b\u8a08\u7b97\u3092\u5b9f\u884c\nresult = data.sum().compute()\n\nprint(result)<\/pre>\n\n\n\n<p>\u4e0a\u8a18\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001<code>da.random.random<\/code>\u3092\u4f7f\u3063\u3066\u5927\u898f\u6a21\u306a\u30e9\u30f3\u30c0\u30e0\u30c7\u30fc\u30bf\u3092\u751f\u6210\u3057\u3001<code>chunks<\/code>\u5f15\u6570\u3067\u914d\u5217\u3092\u5206\u5272\u3059\u308b\u30b5\u30a4\u30ba\u3092\u6307\u5b9a\u3057\u3066\u3044\u307e\u3059\u3002\u305d\u3057\u3066\u3001<code>sum<\/code>\u30e1\u30bd\u30c3\u30c9\u3067\u5206\u5272\u3055\u308c\u305f\u914d\u5217\u306e\u5408\u8a08\u5024\u3092\u8a08\u7b97\u3057\u3066\u3044\u307e\u3059\u3002<code>compute<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u547c\u3073\u51fa\u3059\u3053\u3068\u3067\u3001\u5b9f\u969b\u306e\u8a08\u7b97\u304c\u4e26\u5217\u306b\u5b9f\u884c\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-2\">\u306a\u305cDask\u304c\u6ce8\u76ee\u3092\u96c6\u3081\u3066\u3044\u308b\u306e\u304b<\/h3>\n\n\n\n<p>Dask\u304c\u6ce8\u76ee\u3092\u96c6\u3081\u3066\u3044\u308b\u7406\u7531\u306f\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u306b\u304a\u3044\u3066Python\u30e6\u30fc\u30b6\u30fc\u304c\u62b1\u3048\u308b\u8ab2\u984c\u3092\u89e3\u6c7a\u3067\u304d\u308b\u304b\u3089\u3067\u3059\u3002<\/p>\n\n\n\n<p>Python\u306f\u3001\u30c7\u30fc\u30bf\u51e6\u7406\u3084\u6a5f\u68b0\u5b66\u7fd2\u306e\u5206\u91ce\u3067\u5e83\u304f\u4f7f\u308f\u308c\u3066\u3044\u307e\u3059\u304c\u3001\u30e1\u30e2\u30ea\u5236\u9650\u306b\u3088\u3063\u3066\u6271\u3048\u308b\u30c7\u30fc\u30bf\u91cf\u306b\u9650\u754c\u304c\u3042\u308a\u307e\u3057\u305f\u3002\u307e\u305f\u3001\u4e26\u5217\u51e6\u7406\u3092\u884c\u3046\u305f\u3081\u306b\u306f\u3001\u8907\u96d1\u306a\u5b9f\u88c5\u304c\u5fc5\u8981\u3067\u3042\u308a\u3001\u5c02\u9580\u7684\u306a\u77e5\u8b58\u304c\u6c42\u3081\u3089\u308c\u3066\u3044\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<p>Dask\u306f\u3001\u3053\u308c\u3089\u306e\u8ab2\u984c\u3092\u89e3\u6c7a\u3059\u308b\u305f\u3081\u306b\u4f5c\u3089\u308c\u305f\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3059\u3002Dask\u3092\u4f7f\u3048\u3070\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u3092\u30e1\u30e2\u30ea\u306b\u8f09\u305b\u308b\u3053\u3068\u306a\u304f\u51e6\u7406\u3067\u304d\u308b\u305f\u3081\u3001Python\u3067\u30d3\u30c3\u30b0\u30c7\u30fc\u30bf\u51e6\u7406\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001\u4e26\u5217\u51e6\u7406\u3082\u7c21\u5358\u306b\u5b9f\u88c5\u3067\u304d\u308b\u305f\u3081\u3001\u5c02\u9580\u77e5\u8b58\u304c\u306a\u304f\u3066\u3082\u9ad8\u901f\u306a\u51e6\u7406\u304c\u53ef\u80fd\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u3055\u3089\u306b\u3001Dask\u306f\u65e2\u5b58\u306ePython\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u306e\u4e92\u63db\u6027\u304c\u9ad8\u3044\u305f\u3081\u3001NumPy\u3084Pandas\u306a\u3069\u3067\u66f8\u304b\u308c\u305f\u65e2\u5b58\u306e\u30b3\u30fc\u30c9\u3092\u308f\u305a\u304b\u306a\u4fee\u6b63\u3067\u4e26\u5217\u5316\u3067\u304d\u307e\u3059\u3002\u3053\u306e\u5229\u4fbf\u6027\u306e\u9ad8\u3055\u304c\u3001\u591a\u304f\u306ePython\u30e6\u30fc\u30b6\u30fc\u306b\u3068\u3063\u3066\u9b45\u529b\u3068\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-3\">Dask\u3092\u4f7f\u3046\u30e1\u30ea\u30c3\u30c8<\/h3>\n\n\n\n<p>Dask\u3092\u4f7f\u3046\u30e1\u30ea\u30c3\u30c8\u306f\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u307e\u3068\u3081\u3089\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u304c\u53ef\u80fd\n<ul class=\"wp-block-list\">\n<li>Out-of-core computation: \u30e1\u30e2\u30ea\u306b\u8f09\u308a\u5207\u3089\u306a\u3044\u30c7\u30fc\u30bf\u3092\u6271\u3048\u308b<\/li>\n\n\n\n<li>Lazy evaluation: \u8a08\u7b97\u306e\u5b9f\u884c\u3092\u9045\u5ef6\u3055\u305b\u3001\u5fc5\u8981\u306b\u306a\u3063\u305f\u30bf\u30a4\u30df\u30f3\u30b0\u3067\u8a55\u4fa1\u3059\u308b<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>\u7c21\u5358\u306b\u4e26\u5217\u51e6\u7406\u304c\u884c\u3048\u308b\n<ul class=\"wp-block-list\">\n<li>\u30bf\u30b9\u30af\u30b0\u30e9\u30d5\u306b\u3088\u308b\u81ea\u52d5\u7684\u306a\u4e26\u5217\u5316\u3068\u30b9\u30b1\u30b8\u30e5\u30fc\u30ea\u30f3\u30b0<\/li>\n\n\n\n<li>\u30de\u30eb\u30c1\u30b3\u30a2\u3084\u30de\u30eb\u30c1\u30de\u30b7\u30f3\u3067\u306e\u4e26\u5217\u51e6\u7406\u3092\u30b5\u30dd\u30fc\u30c8<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>\u65e2\u5b58\u306ePython\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u306e\u9ad8\u3044\u4e92\u63db\u6027\n<ul class=\"wp-block-list\">\n<li>NumPy\u3084Pandas\u306a\u3069\u306e\u4eba\u6c17\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u540c\u69d8\u306e\u8a18\u6cd5\u304c\u4f7f\u3048\u308b<\/li>\n\n\n\n<li>\u65e2\u5b58\u306e\u30b3\u30fc\u30c9\u3092\u5c11\u3057\u4fee\u6b63\u3059\u308b\u3060\u3051\u3067Dask\u306b\u79fb\u884c\u3067\u304d\u308b<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>\u67d4\u8edf\u6027\u304c\u9ad8\u304f\u3001\u69d8\u3005\u306a\u7528\u9014\u306b\u5229\u7528\u53ef\u80fd\n<ul class=\"wp-block-list\">\n<li>\u30c7\u30fc\u30bf\u51e6\u7406\u3001\u6a5f\u68b0\u5b66\u7fd2\u3001ETL\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306a\u3069\u3001\u5e45\u5e83\u3044\u5206\u91ce\u3067\u6d3b\u7528\u3067\u304d\u308b<\/li>\n<\/ul>\n<\/li>\n<\/ol>\n\n\n\n<p>\u4ee5\u4e0a\u306e\u3088\u3046\u306b\u3001Dask\u306f\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u3068\u4e26\u5217\u5316\u3092\u624b\u8efd\u306b\u5b9f\u73fe\u3067\u304d\u308b\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3042\u308a\u3001Python\u30e6\u30fc\u30b6\u30fc\u306b\u3068\u3063\u3066\u975e\u5e38\u306b\u9b45\u529b\u7684\u306a\u30c4\u30fc\u30eb\u3068\u8a00\u3048\u307e\u3059\u3002\u6b21\u7ae0\u3067\u306f\u3001\u5b9f\u969b\u306bDask\u3092\u4f7f\u3063\u305f\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3092\u898b\u3066\u3044\u304d\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-4\">\u30b7\u30f3\u30d7\u30eb\u306a\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067Dask\u306e\u4f7f\u3044\u65b9\u3092\u7406\u89e3\u3057\u3088\u3046<\/h2>\n\n\n\n<p>\u305d\u308c\u3067\u306f\u3001\u5b9f\u969b\u306bDask\u3092\u4f7f\u3063\u305f\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3092\u898b\u3066\u3044\u304d\u307e\u3057\u3087\u3046\u3002\u3053\u3053\u3067\u306f\u3001Dask\u306e\u4e3b\u8981\u306a\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u3067\u3042\u308bDask Array\u3001Dask DataFrame\u3001Dask Delayed\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u306e\u4f8b\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-5\">Dask\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u65b9\u6cd5<\/h3>\n\n\n\n<p>Dask\u3092\u4f7f\u3046\u306b\u306f\u3001\u307e\u305a\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3092\u884c\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>pip\u3092\u4f7f\u3046\u5834\u5408:<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">pip install dask<\/pre>\n\n\n\n<p>conda\u3092\u4f7f\u3046\u5834\u5408:<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">conda install -c conda-forge dask<\/pre>\n\n\n\n<p>\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u304c\u5b8c\u4e86\u3057\u305f\u3089\u3001Python\u30b9\u30af\u30ea\u30d7\u30c8\u306e\u4e2d\u3067Dask\u3092\u30a4\u30f3\u30dd\u30fc\u30c8\u3057\u3066\u4f7f\u7528\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-6\">Dask\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u306e\u57fa\u672c\u7684\u306a\u66f8\u304d\u65b9<\/h3>\n\n\n\n<p>Dask\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u306e\u57fa\u672c\u7684\u306a\u6d41\u308c\u306f\u3001\u4ee5\u4e0b\u306e3\u30b9\u30c6\u30c3\u30d7\u3067\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u3092\u5206\u5272\u3057\u3066\u3001Dask\u306e\u30c7\u30fc\u30bf\u69cb\u9020\uff08Dask Array\u3001Dask DataFrame\u306a\u3069\uff09\u306b\u5909\u63db\u3059\u308b<\/li>\n\n\n\n<li>\u5206\u5272\u3055\u308c\u305f\u30c7\u30fc\u30bf\u4e0a\u3067\u4e26\u5217\u306b\u8a08\u7b97\u3092\u5b9f\u884c\u3059\u308b<\/li>\n\n\n\n<li>\u8a08\u7b97\u7d50\u679c\u3092\u96c6\u7d04\u3057\u3066\u3001\u6700\u7d42\u7684\u306a\u7d50\u679c\u3092\u5f97\u308b<\/li>\n<\/ol>\n\n\n\n<p>\u3053\u308c\u3089\u306e\u30b9\u30c6\u30c3\u30d7\u3092\u5b9f\u73fe\u3059\u308b\u305f\u3081\u306b\u3001\u5404\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u306b\u306f<code>from_array<\/code>\u3084<code>from_pandas<\/code>\u306e\u3088\u3046\u306a\u5909\u63db\u95a2\u6570\u3001<code>compute<\/code>\u306e\u3088\u3046\u306a\u8a08\u7b97\u306e\u5b9f\u884c\u95a2\u6570\u304c\u7528\u610f\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0b\u306f\u3001Dask Array\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u306e\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import numpy as np\nimport dask.array as da\n\n# NumPy\u306e\u914d\u5217\u3092\u4f5c\u6210\nnp_arr = np.random.random((1000, 1000))\n\n# NumPy\u306e\u914d\u5217\u3092Dask Array\u306b\u5909\u63db\ndask_arr = da.from_array(np_arr, chunks=(500, 500))\n\n# \u5206\u5272\u3055\u308c\u305f\u30c1\u30e3\u30f3\u30af\u4e0a\u3067\u4e26\u5217\u306b\u8a08\u7b97\u3092\u5b9f\u884c\nresult = dask_arr.mean(axis=0).compute()\n\nprint(result)<\/pre>\n\n\n\n<p>\u4e0a\u8a18\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001NumPy\u306e\u914d\u5217\u3092Dask Array\u306b\u5909\u63db\u3057\u3001<code>mean<\/code>\u30e1\u30bd\u30c3\u30c9\u3067\u5217\u3054\u3068\u306e\u5e73\u5747\u5024\u3092\u8a08\u7b97\u3057\u3066\u3044\u307e\u3059\u3002<code>chunks<\/code>\u5f15\u6570\u3067\u914d\u5217\u306e\u5206\u5272\u30b5\u30a4\u30ba\u3092\u6307\u5b9a\u3059\u308b\u3053\u3068\u3067\u3001\u4e26\u5217\u51e6\u7406\u304c\u884c\u308f\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u6b21\u306b\u3001Dask DataFrame\u3092\u4f7f\u3063\u305f\u4f8b\u3092\u898b\u3066\u307f\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import pandas as pd\nimport dask.dataframe as dd\n\n# \u30b5\u30f3\u30d7\u30eb\u30c7\u30fc\u30bf\u3092\u4f5c\u6210\ndf = pd.DataFrame({'x': range(1000000), 'y': range(1000000)})\n\n# Pandas\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u3092Dask DataFrame\u306b\u5909\u63db\nddf = dd.from_pandas(df, npartitions=10)\n\n# \u5206\u5272\u3055\u308c\u305f\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u4e0a\u3067\u4e26\u5217\u306b\u8a08\u7b97\u3092\u5b9f\u884c\nresult = ddf.groupby('x').mean().compute()\n\nprint(result.head())<\/pre>\n\n\n\n<p>\u3053\u3053\u3067\u306f\u3001Pandas\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u3092<code>from_pandas<\/code>\u95a2\u6570\u3067Dask DataFrame\u306b\u5909\u63db\u3057\u3066\u3044\u307e\u3059\u3002<code>npartitions<\/code>\u5f15\u6570\u3067\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u306e\u5206\u5272\u6570\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u305d\u3057\u3066\u3001<code>groupby<\/code>\u3068<code>mean<\/code>\u3092\u4f7f\u3063\u3066\u30b0\u30eb\u30fc\u30d7\u3054\u3068\u306e\u5e73\u5747\u5024\u3092\u8a08\u7b97\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u6700\u5f8c\u306b\u3001Dask Delayed\u3092\u4f7f\u3063\u305f\u4f8b\u3092\u898b\u3066\u307f\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from dask import delayed\n\n@delayed\ndef add(x, y):\n    return x + y\n\n@delayed\ndef mul(x, y):\n    return x * y\n\n# \u9045\u5ef6\u8a55\u4fa1\u306e\u8a08\u7b97\u30b0\u30e9\u30d5\u3092\u69cb\u7bc9\nz = add(1, 2)\nw = mul(z, 3)\n\n# \u8a08\u7b97\u306e\u5b9f\u884c\nprint(w.compute())<\/pre>\n\n\n\n<p>Dask Delayed\u306f\u3001\u4efb\u610f\u306ePython\u95a2\u6570\u3092\u9045\u5ef6\u8a55\u4fa1\u5316\u3059\u308b\u305f\u3081\u306e\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u3067\u3059\u3002\u4e0a\u8a18\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001<code>delayed<\/code>\u30c7\u30b3\u30ec\u30fc\u30bf\u3092\u4f7f\u3063\u3066<code>add<\/code>\u95a2\u6570\u3068<code>mul<\/code>\u95a2\u6570\u3092\u9045\u5ef6\u8a55\u4fa1\u5316\u3057\u3066\u3044\u307e\u3059\u3002\u305d\u3057\u3066\u3001\u3053\u308c\u3089\u306e\u95a2\u6570\u3092\u4f7f\u3063\u3066\u8a08\u7b97\u30b0\u30e9\u30d5\u3092\u69cb\u7bc9\u3057\u3001<code>compute<\/code>\u30e1\u30bd\u30c3\u30c9\u3067\u5b9f\u969b\u306e\u8a08\u7b97\u3092\u5b9f\u884c\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-7\">Dask\u3092\u4f7f\u3046\u3053\u3068\u3067\u3069\u308c\u304f\u3089\u3044\u51e6\u7406\u304c\u901f\u304f\u306a\u308b\u306e\u304b\u691c\u8a3c<\/h3>\n\n\n\n<p>Dask\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u51e6\u7406\u304c\u3069\u308c\u304f\u3089\u3044\u901f\u304f\u306a\u308b\u306e\u304b\u3001\u5b9f\u969b\u306b\u691c\u8a3c\u3057\u3066\u307f\u307e\u3057\u3087\u3046\u3002\u3053\u3053\u3067\u306f\u3001\u5927\u898f\u6a21\u306aCSV\u30d5\u30a1\u30a4\u30eb\u3092\u8aad\u307f\u8fbc\u3093\u3067\u3001\u4e26\u5217\u51e6\u7406\u3092\u884c\u3046\u4f8b\u3092\u898b\u3066\u307f\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import pandas as pd\nimport dask.dataframe as dd\nimport time\n\n# \u901a\u5e38\u306ePandas\u3092\u4f7f\u3063\u305f\u51e6\u7406\nstart = time.time()\ndf = pd.read_csv('large_csv_file.csv')\nresult = df.groupby('category').mean()\nend = time.time()\nprint(f\"Pandas processing time: {end - start:.2f} seconds\")\n\n# Dask\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\nstart = time.time()\nddf = dd.read_csv('large_csv_file.csv')\nresult = ddf.groupby('category').mean().compute()\nend = time.time()\nprint(f\"Dask processing time: {end - start:.2f} seconds\")<\/pre>\n\n\n\n<p>\u4e0a\u8a18\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001\u540c\u3058CSV\u30d5\u30a1\u30a4\u30eb\u306b\u5bfe\u3057\u3066\u3001Pandas\u3092\u4f7f\u3063\u305f\u51e6\u7406\u3068Dask\u3092\u4f7f\u3063\u305f\u51e6\u7406\u3092\u884c\u3044\u3001\u305d\u308c\u305e\u308c\u306e\u51e6\u7406\u6642\u9593\u3092\u8a08\u6e2c\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u5b9f\u884c\u7d50\u679c\u306f\u3001CSV\u30d5\u30a1\u30a4\u30eb\u306e\u30b5\u30a4\u30ba\u3084\u30de\u30b7\u30f3\u306e\u30b9\u30da\u30c3\u30af\u306b\u3082\u3088\u308a\u307e\u3059\u304c\u3001Dask\u3092\u4f7f\u3063\u305f\u5834\u5408\u306f\u3001Pandas\u3092\u4f7f\u3063\u305f\u5834\u5408\u306b\u6bd4\u3079\u3066\u6570\u500d\u304b\u3089\u6570\u5341\u500d\u9ad8\u901f\u306b\u306a\u308b\u3053\u3068\u304c\u671f\u5f85\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u3088\u3046\u306b\u3001Dask\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u3092\u7c21\u5358\u304b\u3064\u9ad8\u901f\u306b\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u6b21\u7ae0\u3067\u306f\u3001Dask\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u306e\u3088\u308a\u5b9f\u8df5\u7684\u306a\u30c6\u30af\u30cb\u30c3\u30af\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-8\">Dask\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u306e\u30c6\u30af\u30cb\u30c3\u30af<\/h2>\n\n\n\n<p>Dask\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u3092\u3088\u308a\u52b9\u679c\u7684\u306b\u884c\u3046\u305f\u3081\u306b\u306f\u3001\u3044\u304f\u3064\u304b\u306e\u30c6\u30af\u30cb\u30c3\u30af\u3092\u77e5\u3063\u3066\u304a\u304f\u3068\u4fbf\u5229\u3067\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u30b9\u30b1\u30b8\u30e5\u30fc\u30e9\u30fc\u306e\u9078\u3073\u65b9\u3001Dask Array\u306e\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u306e\u6700\u9069\u5316\u3001Dask DataFrame\u306e\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306e\u6700\u9069\u5316\u3001Pandas\u3068Dask\u306e\u4f75\u7528\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-9\">Dask\u306e\u30b9\u30b1\u30b8\u30e5\u30fc\u30e9\u30fc\u306e\u9078\u3073\u65b9<\/h3>\n\n\n\n<p>Dask\u306b\u306f\u3001\u4ee5\u4e0b\u306e3\u7a2e\u985e\u306e\u30b9\u30b1\u30b8\u30e5\u30fc\u30e9\u30fc\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><code>dask.threaded<\/code>: \u30b7\u30f3\u30b0\u30eb\u30de\u30b7\u30f3\u306e\u30de\u30eb\u30c1\u30b3\u30a2\u3067\u4e26\u5217\u51e6\u7406\u3092\u884c\u3046\uff08\u30c7\u30d5\u30a9\u30eb\u30c8\uff09<\/li>\n\n\n\n<li><code>dask.multiprocessing<\/code>: Python\u306e\u30de\u30eb\u30c1\u30d7\u30ed\u30bb\u30c3\u30b7\u30f3\u30b0\u3092\u4f7f\u3063\u3066\u4e26\u5217\u51e6\u7406\u3092\u884c\u3046<\/li>\n\n\n\n<li><code>dask.distributed<\/code>: \u5206\u6563\u74b0\u5883\u3067\u4e26\u5217\u51e6\u7406\u3092\u884c\u3046<\/li>\n<\/ul>\n\n\n\n<p>\u30b9\u30b1\u30b8\u30e5\u30fc\u30e9\u30fc\u306f\u3001<code>compute<\/code>\u30e1\u30bd\u30c3\u30c9\u306e<code>scheduler<\/code>\u5f15\u6570\u3067\u6307\u5b9a\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u4f8b\u3048\u3070\u3001<code>dask.multiprocessing<\/code>\u3092\u4f7f\u3046\u5834\u5408\u306f\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">result = dask_array.mean(axis=0).compute(scheduler='multiprocessing')<\/pre>\n\n\n\n<p>\u30de\u30eb\u30c1\u30b3\u30a2CPU\u3092\u6301\u3064\u30b7\u30f3\u30b0\u30eb\u30de\u30b7\u30f3\u3067\u306f\u3001<code>dask.threaded<\/code>\u304b<code>dask.multiprocessing<\/code>\u3092\u4f7f\u3046\u306e\u304c\u826f\u3044\u3067\u3057\u3087\u3046\u3002\u8907\u6570\u306e\u30de\u30b7\u30f3\u3092\u4f7f\u3063\u305f\u5206\u6563\u51e6\u7406\u3092\u884c\u3046\u5834\u5408\u306f\u3001<code>dask.distributed<\/code>\u3092\u4f7f\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-10\">Dask Array\u306e\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u306e\u6700\u9069\u5316<\/h3>\n\n\n\n<p>Dask Array\u306f\u3001<code>chunks<\/code>\u5f15\u6570\u3067\u6307\u5b9a\u3057\u305f\u30b5\u30a4\u30ba\u306b\u5206\u5272\u3055\u308c\u307e\u3059\u3002\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u304c\u5c0f\u3055\u3059\u304e\u308b\u3068\u3001\u30aa\u30fc\u30d0\u30fc\u30d8\u30c3\u30c9\u304c\u5927\u304d\u304f\u306a\u308a\u3001\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u304c\u4f4e\u4e0b\u3057\u307e\u3059\u3002\u9006\u306b\u3001\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u304c\u5927\u304d\u3059\u304e\u308b\u3068\u3001\u4e26\u5217\u51e6\u7406\u306e\u52b9\u679c\u304c\u9650\u5b9a\u7684\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u9069\u5207\u306a\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u306f\u3001\u30c7\u30fc\u30bf\u306e\u30b5\u30a4\u30ba\u3001\u4f7f\u7528\u53ef\u80fd\u306a\u30e1\u30e2\u30ea\u3001CPU\u30b3\u30a2\u6570\u306a\u3069\u306b\u4f9d\u5b58\u3057\u307e\u3059\u304c\u3001\u4e00\u822c\u7684\u306b\u306f\u6570\u5341MB\u301c\u6570\u767eMB\u306e\u7bc4\u56f2\u3067\u8a2d\u5b9a\u3059\u308b\u306e\u304c\u826f\u3044\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0b\u306f\u3001\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u3092\u5909\u3048\u3066Dask Array\u3092\u4f5c\u6210\u3057\u3001\u8a08\u7b97\u306e\u5b9f\u884c\u6642\u9593\u3092\u6bd4\u8f03\u3059\u308b\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import numpy as np\nimport dask.array as da\nimport time\n\n# \u5927\u898f\u6a21\u306aNumPy\u306e\u914d\u5217\u3092\u4f5c\u6210\nnp_arr = np.random.random((10000, 10000))\n\n# \u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u3092\u5909\u3048\u3066Dask Array\u306b\u5909\u63db\ndask_arr_1 = da.from_array(np_arr, chunks=(1000, 1000))\ndask_arr_2 = da.from_array(np_arr, chunks=(5000, 5000))\n\n# \u8a08\u7b97\u306e\u5b9f\u884c\u6642\u9593\u3092\u6bd4\u8f03\nstart = time.time()\nresult_1 = dask_arr_1.mean(axis=0).compute()\nend = time.time()\nprint(f\"Chunk size (1000, 1000): {end - start:.2f} seconds\")\n\nstart = time.time()\nresult_2 = dask_arr_2.mean(axis=0).compute()\nend = time.time()\nprint(f\"Chunk size (5000, 5000): {end - start:.2f} seconds\")<\/pre>\n\n\n\n<p>\u5b9f\u884c\u7d50\u679c\u3092\u898b\u308b\u3068\u3001\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u306b\u3088\u3063\u3066\u8a08\u7b97\u6642\u9593\u304c\u7570\u306a\u308b\u3053\u3068\u304c\u308f\u304b\u308a\u307e\u3059\u3002\u6700\u9069\u306a\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u306f\u3001\u5b9f\u969b\u306e\u30c7\u30fc\u30bf\u3084\u74b0\u5883\u306b\u5408\u308f\u305b\u3066\u8abf\u6574\u3057\u3066\u3044\u304f\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-11\">Dask DataFrame\u306e\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306e\u6700\u9069\u5316<\/h3>\n\n\n\n<p>Dask DataFrame\u306f\u3001<code>npartitions<\/code>\u5f15\u6570\u3067\u6307\u5b9a\u3057\u305f\u6570\u306b\u5206\u5272\u3055\u308c\u307e\u3059\u3002\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u304c\u5c11\u306a\u3059\u304e\u308b\u3068\u3001\u4e26\u5217\u51e6\u7406\u306e\u52b9\u679c\u304c\u9650\u5b9a\u7684\u306b\u306a\u308a\u307e\u3059\u3002\u9006\u306b\u3001\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u304c\u591a\u3059\u304e\u308b\u3068\u3001\u30aa\u30fc\u30d0\u30fc\u30d8\u30c3\u30c9\u304c\u5927\u304d\u304f\u306a\u308a\u3001\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u304c\u4f4e\u4e0b\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u9069\u5207\u306a\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306f\u3001\u30c7\u30fc\u30bf\u306e\u30b5\u30a4\u30ba\u3001\u4f7f\u7528\u53ef\u80fd\u306a\u30e1\u30e2\u30ea\u3001CPU\u30b3\u30a2\u6570\u306a\u3069\u306b\u4f9d\u5b58\u3057\u307e\u3059\u304c\u3001\u4e00\u822c\u7684\u306b\u306fCPU\u30b3\u30a2\u6570\u306e\u6570\u500d\u7a0b\u5ea6\u306b\u8a2d\u5b9a\u3059\u308b\u306e\u304c\u826f\u3044\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0b\u306f\u3001\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u3092\u5909\u3048\u3066Dask DataFrame\u3092\u4f5c\u6210\u3057\u3001\u8a08\u7b97\u306e\u5b9f\u884c\u6642\u9593\u3092\u6bd4\u8f03\u3059\u308b\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import pandas as pd\nimport dask.dataframe as dd\nimport time\n\n# \u5927\u898f\u6a21\u306a\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\ndf = pd.read_csv('large_csv_file.csv')\n\n# \u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u3092\u5909\u3048\u3066Dask DataFrame\u306b\u5909\u63db\nddf_1 = dd.from_pandas(df, npartitions=4)\nddf_2 = dd.from_pandas(df, npartitions=16)\n\n# \u8a08\u7b97\u306e\u5b9f\u884c\u6642\u9593\u3092\u6bd4\u8f03\nstart = time.time()\nresult_1 = ddf_1.groupby('category').mean().compute()\nend = time.time()\nprint(f\"Number of partitions (4): {end - start:.2f} seconds\")\n\nstart = time.time()\nresult_2 = ddf_2.groupby('category').mean().compute()\nend = time.time()\nprint(f\"Number of partitions (16): {end - start:.2f} seconds\")<\/pre>\n\n\n\n<p>\u5b9f\u884c\u7d50\u679c\u3092\u898b\u308b\u3068\u3001\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306b\u3088\u3063\u3066\u8a08\u7b97\u6642\u9593\u304c\u7570\u306a\u308b\u3053\u3068\u304c\u308f\u304b\u308a\u307e\u3059\u3002\u6700\u9069\u306a\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306f\u3001\u5b9f\u969b\u306e\u30c7\u30fc\u30bf\u3084\u74b0\u5883\u306b\u5408\u308f\u305b\u3066\u8abf\u6574\u3057\u3066\u3044\u304f\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-12\">Pandas\u3068Dask\u3092\u4f75\u7528\u3059\u308b\u65b9\u6cd5<\/h3>\n\n\n\n<p>Dask\u306f\u3001Pandas\u3068\u306e\u4e92\u63db\u6027\u304c\u9ad8\u3044\u305f\u3081\u3001\u4e21\u8005\u3092\u4f75\u7528\u3059\u308b\u3053\u3068\u3067\u67d4\u8edf\u306a\u51e6\u7406\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002\u4ee5\u4e0b\u306f\u3001Pandas\u3068Dask\u3092\u4f75\u7528\u3059\u308b\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import pandas as pd\nimport dask.dataframe as dd\n\n# \u5927\u898f\u6a21\u306a\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\ndf = pd.read_csv('large_csv_file.csv')\n\n# \u4e00\u90e8\u306e\u51e6\u7406\u3092Dask\u3067\u884c\u3046\nddf = dd.from_pandas(df, npartitions=10)\nresult = ddf[ddf['value'] &gt; 100].compute()\n\n# \u7d50\u679c\u3092Pandas\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u306b\u5909\u63db\nresult_df = result.compute()<\/pre>\n\n\n\n<p>\u4e0a\u8a18\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001Pandas\u3067\u5927\u898f\u6a21\u306a\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u3001\u4e00\u90e8\u306e\u51e6\u7406\u3092Dask\u3067\u884c\u3063\u3066\u3044\u307e\u3059\u3002\u305d\u306e\u5f8c\u3001\u7d50\u679c\u3092Pandas\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u306b\u5909\u63db\u3057\u3066\u3044\u307e\u3059\u3002\u3053\u306e\u3088\u3046\u306b\u3001Pandas\u3068Dask\u3092\u4f75\u7528\u3059\u308b\u3053\u3068\u3067\u3001\u305d\u308c\u305e\u308c\u306e\u9577\u6240\u3092\u6d3b\u304b\u3057\u305f\u51e6\u7406\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0a\u306e\u3088\u3046\u306b\u3001Dask\u3092\u4f7f\u3063\u305f\u4e26\u5217\u51e6\u7406\u3067\u306f\u3001\u30b9\u30b1\u30b8\u30e5\u30fc\u30e9\u30fc\u306e\u9078\u629e\u3001\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u3084\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306e\u6700\u9069\u5316\u3001Pandas\u3068\u306e\u4f75\u7528\u306a\u3069\u3001\u69d8\u3005\u306a\u30c6\u30af\u30cb\u30c3\u30af\u3092\u99c6\u4f7f\u3059\u308b\u3053\u3068\u3067\u3001\u9ad8\u3044\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3092\u5f15\u304d\u51fa\u3059\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u5b9f\u969b\u306e\u30c7\u30fc\u30bf\u3084\u74b0\u5883\u306b\u5408\u308f\u305b\u3066\u3001\u3053\u308c\u3089\u306e\u30c6\u30af\u30cb\u30c3\u30af\u3092\u9069\u7528\u3057\u3066\u3044\u304f\u3053\u3068\u304c\u91cd\u8981\u3067\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-13\">Dask\u306e\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u3068\u5fdc\u7528\u4f8b<\/h2>\n\n\n\n<p>Dask\u306f\u3001\u69d8\u3005\u306a\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u3067\u6d3b\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001\u6a5f\u68b0\u5b66\u7fd2\u3001ETL\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u3001\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u306e\u5229\u7528\u306b\u3064\u3044\u3066\u3001\u5177\u4f53\u7684\u306a\u5fdc\u7528\u4f8b\u3092\u898b\u3066\u3044\u304d\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-14\">\u6a5f\u68b0\u5b66\u7fd2\u3067\u306eDask\u306e\u6d3b\u7528\u65b9\u6cd5<\/h3>\n\n\n\n<p>Dask\u3092\u4f7f\u3046\u3068\u3001scikit-learn\u306e\u30e2\u30c7\u30eb\u3092\u4e26\u5217\u5316\u3057\u305f\u308a\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306b\u5bfe\u5fdc\u3057\u305f\u5206\u6563\u6a5f\u68b0\u5b66\u7fd2\u3092\u5b9f\u73fe\u3057\u305f\u308a\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>scikit-learn\u306e\u30e2\u30c7\u30eb\u3092Dask\u3067\u4e26\u5217\u5316\u3059\u308b\u306b\u306f\u3001<code>dask_ml.wrappers<\/code>\u3092\u4f7f\u3063\u3066\u30e2\u30c7\u30eb\u3092\u30e9\u30c3\u30d7\u3057\u307e\u3059\u3002\u4ee5\u4e0b\u306f\u3001RandomForestClassifier\u3092\u4e26\u5217\u5316\u3059\u308b\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from sklearn.datasets import load_iris\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestClassifier\nfrom dask_ml.wrappers import ParallelPostFit\n\n# \u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\u3068\u5206\u5272\nX, y = load_iris(return_X_y=True)\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n\n# scikit-learn\u306e\u30e2\u30c7\u30eb\u3092\u4f5c\u6210\nclf = RandomForestClassifier(n_estimators=100, random_state=0)\n\n# Dask\u3067\u30e2\u30c7\u30eb\u3092\u30e9\u30c3\u30d7\nclf_wrapped = ParallelPostFit(clf)\n\n# \u30e2\u30c7\u30eb\u306e\u5b66\u7fd2\u3068\u4e88\u6e2c\nclf_wrapped.fit(X_train, y_train)\ny_pred = clf_wrapped.predict(X_test)\n\n# \u8a55\u4fa1\nfrom sklearn.metrics import accuracy_score\nprint(f\"Accuracy: {accuracy_score(y_test, y_pred):.2f}\")<\/pre>\n\n\n\n<p>\u307e\u305f\u3001Dask-ML\u3092\u4f7f\u3048\u3070\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306b\u5bfe\u5fdc\u3057\u305f\u5206\u6563\u6a5f\u68b0\u5b66\u7fd2\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002Dask-ML\u306f\u3001Dask\u3092\u4f7f\u3063\u305f\u6a5f\u68b0\u5b66\u7fd2\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306b\u5bfe\u5fdc\u3057\u305f\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u63d0\u4f9b\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3055\u3089\u306b\u3001Dask-XGBoost\u3092\u4f7f\u3048\u3070\u3001\u52fe\u914d\u30d6\u30fc\u30b9\u30c6\u30a3\u30f3\u30b0\u306e\u4e26\u5217\u5316\u3082\u53ef\u80fd\u3067\u3059\u3002Dask-XGBoost\u306f\u3001XGBoost\u306eDask\u7248\u3067\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306b\u5bfe\u5fdc\u3057\u305f\u52fe\u914d\u30d6\u30fc\u30b9\u30c6\u30a3\u30f3\u30b0\u3092\u5b9f\u73fe\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-15\">Dask\u3092\u4f7f\u3063\u305fETL\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u69cb\u7bc9<\/h3>\n\n\n\n<p>Dask\u3092\u4f7f\u3046\u3068\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306eETL\uff08Extract, Transform, Load\uff09\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u3092\u52b9\u7387\u7684\u306b\u69cb\u7bc9\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>Dask DataFrame\u3092\u4f7f\u3048\u3070\u3001CSV\u3084Parquet\u3001HDF5\u306a\u3069\u3001\u69d8\u3005\u306a\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u306e\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u307f\u3001\u6b20\u640d\u5024\u306e\u51e6\u7406\u3084\u7279\u5fb4\u91cf\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u3092\u4e26\u5217\u306b\u5b9f\u884c\u3067\u304d\u307e\u3059\u3002\u4ee5\u4e0b\u306f\u3001Dask DataFrame\u3092\u4f7f\u3063\u305fETL\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import dask.dataframe as dd\n\n# \u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\nddf = dd.read_csv('large_csv_file.csv')\n\n# \u6b20\u640d\u5024\u306e\u51e6\u7406\nddf = ddf.fillna(0)\n\n# \u7279\u5fb4\u91cf\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\nddf['new_feature'] = ddf['feature_1'] + ddf['feature_2']\n\n# \u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\nddf = ddf[ddf['value'] &gt; 100]\n\n# \u96c6\u8a08\nresult = ddf.groupby('category').mean().compute()\n\nprint(result)<\/pre>\n\n\n\n<p>\u307e\u305f\u3001Dask Bag\u3092\u4f7f\u3048\u3070\u3001\u5927\u898f\u6a21\u306a\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u306b\u9069\u3057\u305fETL\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u3092\u69cb\u7bc9\u3067\u304d\u307e\u3059\u3002\u6b63\u898f\u8868\u73fe\u3092\u4f7f\u3063\u305f\u6587\u5b57\u5217\u306e\u51e6\u7406\u3092\u4e26\u5217\u306b\u5b9f\u884c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3055\u3089\u306b\u3001Dask Delayed\u3092\u4f7f\u3048\u3070\u3001\u8907\u6570\u306e\u30bf\u30b9\u30af\u3092\u7d44\u307f\u5408\u308f\u305b\u3066ETL\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u3092\u69cb\u7bc9\u3067\u304d\u307e\u3059\u3002\u30bf\u30b9\u30af\u306e\u4f9d\u5b58\u95a2\u4fc2\u3092\u5b9a\u7fa9\u3057\u3001\u52b9\u7387\u7684\u306a\u51e6\u7406\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-16\">Dask\u3092\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u5229\u7528\u3059\u308b\u65b9\u6cd5<\/h3>\n\n\n\n<p>Dask\u306f\u3001\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u3082\u5229\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002Amazon Web Services\uff08AWS\uff09\u3001Google Cloud Platform\uff08GCP\uff09\u3001Microsoft Azure\u306a\u3069\u306e\u4e3b\u8981\u306a\u30af\u30e9\u30a6\u30c9\u30d7\u30e9\u30c3\u30c8\u30d5\u30a9\u30fc\u30e0\u3067\u3001Dask\u3092\u6d3b\u7528\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>AWS\u3067Dask\u3092\u5229\u7528\u3059\u308b\u5834\u5408\u3001AWS EC2\u3067Dask\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u69cb\u7bc9\u3057\u3001AWS S3\u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u540c\u69d8\u306b\u3001GCP\u3067\u306fCompute Engine\u3067Dask\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u69cb\u7bc9\u3057\u3001Cloud Storage\u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3081\u307e\u3059\u3002Azure\u3067\u306f\u3001Virtual Machines\u3067Dask\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u69cb\u7bc9\u3057\u3001Blob Storage\u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3081\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0b\u306f\u3001AWS\u3067Dask\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u69cb\u7bc9\u3057\u3001S3\u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from dask.distributed import Client\nfrom dask_cloudprovider import FargateCluster\nimport dask.dataframe as dd\n\n# Dask\u30af\u30e9\u30b9\u30bf\u30fc\u306e\u4f5c\u6210\ncluster = FargateCluster(n_workers=10)\nclient = Client(cluster)\n\n# S3\u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\nddf = dd.read_csv('s3:\/\/bucket\/large_csv_file.csv')\n\n# \u8a08\u7b97\u306e\u5b9f\u884c\nresult = ddf.groupby('category').mean().compute()\n\nprint(result)<\/pre>\n\n\n\n<p>\u4e0a\u8a18\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001AWS Fargate\u3067Dask\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u4f5c\u6210\u3057\u3001S3\u304b\u3089CSV\u30d5\u30a1\u30a4\u30eb\u3092\u8aad\u307f\u8fbc\u3093\u3067\u3001\u8a08\u7b97\u3092\u5b9f\u884c\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u3088\u3046\u306b\u3001Dask\u3092\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u5229\u7528\u3059\u308b\u3053\u3068\u3067\u3001\u30aa\u30f3\u30d7\u30ec\u30df\u30b9\u306e\u74b0\u5883\u3067\u306f\u6271\u3048\u306a\u3044\u3088\u3046\u306a\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u3092\u3001\u67d4\u8edf\u304b\u3064\u52b9\u7387\u7684\u306b\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0a\u306e\u3088\u3046\u306b\u3001Dask\u306f\u6a5f\u68b0\u5b66\u7fd2\u3001ETL\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u3001\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u3067\u306e\u5229\u7528\u306a\u3069\u3001\u69d8\u3005\u306a\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u3067\u6d3b\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002Dask\u306e\u7279\u5fb4\u3092\u6d3b\u304b\u3059\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u3092\u52b9\u7387\u7684\u306b\u884c\u3044\u3001\u30d3\u30b8\u30cd\u30b9\u306e\u8ab2\u984c\u89e3\u6c7a\u306b\u5f79\u7acb\u3066\u308b\u3053\u3068\u304c\u3067\u304d\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-17\">\u307e\u3068\u3081\uff1aPython\u30a8\u30f3\u30b8\u30cb\u30a2\u5fc5\u643a\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u300cDask\u300d\u3092\u4f7f\u3044\u3053\u306a\u305d\u3046\uff01<\/h2>\n\n\n\n<p>\u672c\u8a18\u4e8b\u3067\u306f\u3001Python\u3067\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u4e26\u5217\u51e6\u7406\u3092\u884c\u3046\u305f\u3081\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u300cDask\u300d\u306b\u3064\u3044\u3066\u8a73\u3057\u304f\u89e3\u8aac\u3057\u3066\u304d\u307e\u3057\u305f\u3002\u3053\u3053\u3067\u306f\u3001Python\u30a8\u30f3\u30b8\u30cb\u30a2\u304c\u3069\u306e\u3088\u3046\u306a\u72b6\u6cc1\u3067Dask\u3092\u4f7f\u3046\u3079\u304d\u304b\u3001Dask\u3092\u4f7f\u3046\u3053\u3068\u3067\u5f97\u3089\u308c\u308b\u30e1\u30ea\u30c3\u30c8\u3001\u6ce8\u610f\u70b9\u306a\u3069\u3092\u307e\u3068\u3081\u307e\u3059\u3002<\/p>\n\n\n\n<p>Dask\u3092\u4f7f\u3046\u3079\u304d\u72b6\u6cc1\u306f\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30b1\u30fc\u30b9\u304c\u6319\u3052\u3089\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u6570\u767eGB\u301c\u6570TB\u898f\u6a21\u306e\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u3092\u6271\u3046\u5fc5\u8981\u304c\u3042\u308b\u5834\u5408<\/li>\n\n\n\n<li>\u30b7\u30f3\u30b0\u30eb\u30de\u30b7\u30f3\u3067\u306e\u51e6\u7406\u3067\u306f\u6642\u9593\u304c\u304b\u304b\u308a\u3059\u304e\u308b\u5834\u5408<\/li>\n\n\n\n<li>\u30de\u30eb\u30c1\u30b3\u30a2CPU\u3084\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u6d3b\u7528\u3057\u3066\u4e26\u5217\u51e6\u7406\u3092\u884c\u3044\u305f\u3044\u5834\u5408<\/li>\n\n\n\n<li>Python\u3084NumPy\u3001Pandas\u306a\u3069\u306e\u77e5\u8b58\u3092\u6d3b\u304b\u3057\u3064\u3064\u3001\u4e26\u5217\u51e6\u7406\u3092\u5b9f\u73fe\u3057\u305f\u3044\u5834\u5408<\/li>\n<\/ul>\n\n\n\n<p>Dask\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30e1\u30ea\u30c3\u30c8\u304c\u5f97\u3089\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Out-of-core computation\u306b\u3088\u308a\u3001\u30e1\u30e2\u30ea\u306b\u8f09\u308a\u5207\u3089\u306a\u3044\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u3092\u6271\u3048\u308b<\/li>\n\n\n\n<li>\u30de\u30eb\u30c1\u30b3\u30a2CPU\u3084\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u6d3b\u7528\u3057\u305f\u4e26\u5217\u51e6\u7406\u306b\u3088\u308a\u3001\u51e6\u7406\u6642\u9593\u3092\u5927\u5e45\u306b\u77ed\u7e2e\u3067\u304d\u308b<\/li>\n\n\n\n<li>NumPy\u3084Pandas\u306a\u3069\u306e\u4eba\u6c17\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u540c\u69d8\u306e\u8a18\u6cd5\u304c\u4f7f\u3048\u3001Python\u306e\u751f\u7523\u6027\u3092\u7dad\u6301\u3067\u304d\u308b<\/li>\n<\/ul>\n\n\n\n<p>\u305f\u3060\u3057\u3001Dask\u3092\u4f7f\u3046\u4e0a\u3067\u306f\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u70b9\u306b\u6ce8\u610f\u304c\u5fc5\u8981\u3067\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u3092\u6271\u3046\u5834\u5408\u3001\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u304c\u554f\u984c\u306b\u306a\u308b\u3053\u3068\u304c\u3042\u308b\u305f\u3081\u3001\u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u3084\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u306e\u8abf\u6574\u304c\u91cd\u8981<\/li>\n\n\n\n<li>\u30b7\u30f3\u30b0\u30eb\u30de\u30b7\u30f3\u304b\u30af\u30e9\u30b9\u30bf\u30fc\u304b\u3001\u30c7\u30fc\u30bf\u30b5\u30a4\u30ba\u3084\u30bf\u30b9\u30af\u306e\u7279\u6027\u306b\u5fdc\u3058\u3066\u9069\u5207\u306a\u30b9\u30b1\u30b8\u30e5\u30fc\u30e9\u30fc\u3092\u9078\u3076\u5fc5\u8981\u304c\u3042\u308b<\/li>\n\n\n\n<li>\u4e26\u5217\u51e6\u7406\u3092\u884c\u3046\u305f\u3081\u3001\u30c7\u30d0\u30c3\u30b0\u3084\u30a8\u30e9\u30fc\u30cf\u30f3\u30c9\u30ea\u30f3\u30b0\u304c\u96e3\u3057\u304f\u306a\u308b\u3053\u3068\u304c\u3042\u308b<\/li>\n<\/ul>\n\n\n\n<p>\u307e\u305f\u3001Dask\u306f\u6a5f\u68b0\u5b66\u7fd2\u3001XGBoost\u3001\u5730\u7406\u7a7a\u9593\u30c7\u30fc\u30bf\u51e6\u7406\u3001\u753b\u50cf\u51e6\u7406\u306a\u3069\u3001\u69d8\u3005\u306a\u5206\u91ce\u306b\u7279\u5316\u3057\u305f\u30a8\u30b3\u30b7\u30b9\u30c6\u30e0\u3092\u6301\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Dask-ML: Dask\u3092\u4f7f\u3063\u305f\u6a5f\u68b0\u5b66\u7fd2\u30e9\u30a4\u30d6\u30e9\u30ea<\/li>\n\n\n\n<li>Dask-XGBoost: XGBoost\u306eDask\u7248<\/li>\n\n\n\n<li>Dask-GeoJSON: \u5730\u7406\u7a7a\u9593\u30c7\u30fc\u30bf\u51e6\u7406\u306e\u305f\u3081\u306eDask\u30b3\u30ec\u30af\u30b7\u30e7\u30f3<\/li>\n\n\n\n<li>Dask-Image: \u753b\u50cf\u51e6\u7406\u306e\u305f\u3081\u306eDask\u30b3\u30ec\u30af\u30b7\u30e7\u30f3<\/li>\n<\/ul>\n\n\n\n<p>\u3053\u308c\u3089\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u6d3b\u7528\u3059\u308b\u3053\u3068\u3067\u3001\u5404\u5206\u91ce\u306e\u554f\u984c\u3092Dask\u3067\u52b9\u7387\u7684\u306b\u89e3\u6c7a\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0b\u306f\u3001Dask-ML\u3092\u4f7f\u3063\u305f\u5206\u6563\u578b\u306e\u6a5f\u68b0\u5b66\u7fd2\u306e\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from dask.distributed import Client\nfrom sklearn.datasets import make_classification\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score\nfrom dask_ml.wrappers import ParallelPostFit\nfrom sklearn.linear_model import LogisticRegression\n\n# Dask\u30af\u30e9\u30b9\u30bf\u30fc\u306b\u63a5\u7d9a\nclient = Client('scheduler-address:8786')\n\n# \u30b5\u30f3\u30d7\u30eb\u30c7\u30fc\u30bf\u306e\u751f\u6210\nX, y = make_classification(n_samples=100000, n_features=20, random_state=42)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\n# scikit-learn\u306e\u30e2\u30c7\u30eb\u3092\u4f5c\u6210\nlr = LogisticRegression()\n\n# Dask-ML\u3067\u30e2\u30c7\u30eb\u3092\u30e9\u30c3\u30d7\nlr_wrapped = ParallelPostFit(lr)\n\n# \u30e2\u30c7\u30eb\u306e\u5b66\u7fd2\u3068\u4e88\u6e2c\nlr_wrapped.fit(X_train, y_train)\ny_pred = lr_wrapped.predict(X_test)\n\n# \u8a55\u4fa1\naccuracy = accuracy_score(y_test, y_pred)\nprint(f'Accuracy: {accuracy:.3f}')<\/pre>\n\n\n\n<p>\u4e0a\u8a18\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001Dask-ML\u3092\u4f7f\u3063\u3066scikit-learn\u306eLogisticRegression\u30e2\u30c7\u30eb\u3092\u5206\u6563\u5b66\u7fd2\u3057\u3001\u7cbe\u5ea6\u3092\u8a55\u4fa1\u3057\u3066\u3044\u307e\u3059\u3002Dask\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306b\u5bfe\u3057\u3066\u3082\u52b9\u7387\u7684\u306b\u6a5f\u68b0\u5b66\u7fd2\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>Dask\u306f\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u306e\u4e26\u5217\u51e6\u7406\u3092\u624b\u8efd\u306b\u5b9f\u73fe\u3067\u304d\u308b\u5f37\u529b\u306a\u30c4\u30fc\u30eb\u3067\u3059\u3002Python\u30a8\u30f3\u30b8\u30cb\u30a2\u304cDask\u3092\u4f7f\u3044\u3053\u306a\u3059\u3053\u3068\u3067\u3001\u30c7\u30fc\u30bf\u30b5\u30a4\u30ba\u304c\u5927\u304d\u304f\u306a\u3063\u3066\u3082\u751f\u7523\u6027\u3092\u7dad\u6301\u3057\u3064\u3064\u3001\u9ad8\u901f\u306a\u51e6\u7406\u3092\u5b9f\u73fe\u3067\u304d\u308b\u3067\u3057\u3087\u3046\u3002\u305c\u3072Dask\u3092\u6d3b\u7528\u3057\u3066\u3001Python\u3067\u306e\u30c7\u30fc\u30bf\u51e6\u7406\u3084\u6a5f\u68b0\u5b66\u7fd2\u306e\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3092\u5411\u4e0a\u3055\u305b\u3066\u304f\u3060\u3055\u3044\uff01<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Python\u3067\u30c7\u30fc\u30bf\u51e6\u7406\u3084\u6a5f\u68b0\u5b66\u7fd2\u3092\u884c\u3046\u969b\u3001\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u3092\u9ad8\u901f\u306b\u51e6\u7406\u3059\u308b\u5fc5\u8981\u306b\u8feb\u3089\u308c\u308b\u3053\u3068\u304c\u3042\u308a\u307e\u3059\u3002\u305d\u3093\u306a\u6642\u3001\u4e26\u5217\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u300cDask\u300d\u304c\u5f37\u529b\u306a\u52a9\u3063\u4eba\u3068\u306a\u308a\u307e\u3059\u3002\u672c\u8a18\u4e8b\u3067\u306f\u3001Dask\u306e\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9\u304b\u3089\u5b9f\u8df5\u7684\u306a\u30c6\u30af &#8230; <\/p>\n","protected":false},"author":1,"featured_media":686,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":{"0":"post-685","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","7":"category-python"},"_links":{"self":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts\/685","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=685"}],"version-history":[{"count":1,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts\/685\/revisions"}],"predecessor-version":[{"id":687,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts\/685\/revisions\/687"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/media\/686"}],"wp:attachment":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=685"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=685"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=685"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}