リスト内のリストが同等であるかどうかを確認するより速い方法はありますか？

Question

ここでは、4つの異なるパーティション、つまり{1}、{2、3、4}、{5、6}、および{7}に整数_1:7_があり、それらのパーティションはリストに書き込まれます（つまり、list(1,c(2,3,4),c(5,6),7)。 1つのパーティション内の要素の異なる順列が同じものとして認識されるように、パーティションをセットとして扱います。たとえば、list(1,c(2,3,4),c(5,6),7)とlist(7,1,c(2,3,4),c(6,5))は同等です。

この問題は上の排他的パーティションについて説明しているため、リストの要素には繰り返しがないので、たとえばlist(c(1,2),c(2,1),c(1,2))がないことに注意してください。セット全体。

以下のように、いくつかの異なる順列をリストlstにリストしました

_lst <- list(list(1,c(2,3,4),c(5,6),7), list(c(2,3,4),1,7,c(5,6)), list(1,c(2,3,4),7,c(6,5)), list(7,1,c(3,2,4),c(5,6))) _

そして、私がしたいことは、すべての順列が同等であることを確認することです。はいの場合、結果はTRUEになります。

これまでに行ったのは、各パーティション内の要素を並べ替え、setdiff()をinterset()およびunion()とともに使用して判断しました（以下のコードを参照）

_s <- Map(function(v) Map(sort,v),lst) equivalent <- length(setdiff(Reduce(union,s),Reduce(intersect,s),))==0 _

ただし、パーティションサイズが大きくなると、この方法は遅くなると思います。それを作るためのより速いアプローチはありますか？事前に感謝！

いくつかのテストケース（小さなサイズのデータ）

_# should return `TRUE` lst1 <- list(list(1,c(2,3,4),c(5,6)), list(c(2,3,4),1,c(5,6)), list(1,c(2,3,4),c(6,5))) # should return `TRUE` lst2 <- list(list(1:2, 3:4), list(3:4, 1:2)) # should return `FALSE` lst3 <- list(list(1,c(2,3,4),c(5,6)), list(c(2,3,4),1,c(5,6)), list(1,c(2,3,5),c(6,4))) _

Joseph Wood · Accepted Answer

Rおよびfastのバリアントに関する投稿は、 rcpp を特徴とするソリューション。

効率を最大化するには、正しいデータ構造を選択することが最も重要です。私たちのデータ構造は一意の値を格納する必要があり、高速な挿入/アクセスも必要です。これがまさに std :: unordered_set が具体化するものです。順序付けられていないvectorの各integersを一意に識別する方法を決定する必要があるだけです。

算術の基本定理を入力します

FTAは、すべての数は素数の積によって一意に（因子のorderまで）表すことができると述べています。

以下は、FTAを使用して2つのベクトルが次数と等しいかどうかをすばやく解読する方法を示す例です（以下のN.B. Pは素数のリストです... _(2, 3, 5, 7, 11, etc.)_：

_ Maps to Maps to product vec1 = (1, 2, 7) -->> P[1], P[2], P[7] --->> 2, 3, 17 -->> 102 vec2 = (7, 3, 1) -->> P[7], P[3], P[1] --->> 17, 5, 2 -->> 170 vec3 = (2, 7, 1) -->> P[2], P[7], P[1] --->> 3, 17, 2 -->> 102 _

このことから、_vec1_と_vec3_は同じ番号に正しくマッピングされるのに対し、_vec2_は別の値にマッピングされることがわかります。

実際のベクトルには1000未満の100までの整数が含まれる可能性があるため、FTAを適用すると非常に大きな数が生成されます。これを回避するには、対数の積ルールを利用します。

ログ_b（xy）=ログ_b（x）+ログ_b（y）

これで自由に使えるので、もっと大きな数の例に取り組むことができます（これは非常に大きな例で悪化し始めます）。

まず、単純な素数ジェネレーターが必要です（NB。実際には各素数のログを生成しています）。

_#include <Rcpp.h> using namespace Rcpp; // [[Rcpp::plugins(cpp11)]] void getNPrimes(std::vector<double> &logPrimes) { const int n = logPrimes.size(); const int limit = static_cast<int>(2.0 * static_cast<double>(n) * std::log(n)); std::vector<bool> sieve(limit + 1, true); int lastP = 3; const int fsqr = std::sqrt(static_cast<double>(limit)); while (lastP <= fsqr) { for (int j = lastP * lastP; j <= limit; j += 2 * lastP) sieve[j] = false; int ind = 2; for (int k = lastP + 2; !sieve[k]; k += 2) ind += 2; lastP += ind; } logPrimes[0] = std::log(2.0); for (int i = 3, j = 1; i <= limit && j < n; i += 2) if (sieve[i]) logPrimes[j++] = std::log(static_cast<double>(i)); } _

そして、これが主な実装です：

_// [[Rcpp::export]] bool f_Rcpp_Hash(List x) { List tempLst = x[0]; const int n = tempLst.length(); int myMax = 0; // Find the max so we know how many primes to generate for (int i = 0; i < n; ++i) { IntegerVector v = tempLst[i]; const int tempMax = *std::max_element(v.cbegin(), v.cend()); if (tempMax > myMax) myMax = tempMax; } std::vector<double> logPrimes(myMax + 1, 0.0); getNPrimes(logPrimes); double sumMax = 0.0; for (int i = 0; i < n; ++i) { IntegerVector v = tempLst[i]; double mySum = 0.0; for (auto j: v) mySum += logPrimes[j]; if (mySum > sumMax) sumMax = mySum; } // Since all of the sums will be double values and we want to // ensure that they are compared with scrutiny, we multiply // each sum by a very large integer to bring the decimals to // the right of the zero and then convert them to an integer. // E.g. Using the example above v1 = (1, 2, 7) & v2 = (7, 3, 1) // // sum of log of primes for v1 = log(2) + log(3) + log(17) // ~= 4.62497281328427 // // sum of log of primes for v2 = log(17) + log(5) + log(2) // ~= 5.13579843705026 // // multiplier = floor(.Machine$integer.max / 5.13579843705026) // [1] 418140173 // // Now, we multiply each sum and convert to an integer // // as.integer(4.62497281328427 * 418140173) // [1] 1933886932 <<-- This is the key for v1 // // as.integer(5.13579843705026 * 418140173) // [1] 2147483646 <<-- This is the key for v2 const uint64_t multiplier = std::numeric_limits<int>::max() / sumMax; std::unordered_set<uint64_t> Canon; Canon.reserve(n); for (int i = 0; i < n; ++i) { IntegerVector v = tempLst[i]; double mySum = 0.0; for (auto j: v) mySum += logPrimes[j]; Canon.insert(static_cast<uint64_t>(multiplier * mySum)); } const auto myEnd = Canon.end(); for (auto it = x.begin() + 1; it != x.end(); ++it) { List tempLst = *it; if (tempLst.length() != n) return false; for (int j = 0; j < n; ++j) { IntegerVector v = tempLst[j]; double mySum = 0.0; for (auto k: v) mySum += logPrimes[k]; const uint64_t key = static_cast<uint64_t>(multiplier * mySum); if (Canon.find(key) == myEnd) return false; } } return true; } _

@GKiによって与えられたlst1, lst2, lst3, & lst (the large one)に適用したときの結果は次のとおりです。

_f_Rcpp_Hash(lst) [1] TRUE f_Rcpp_Hash(lst1) [1] TRUE f_Rcpp_Hash(lst2) [1] FALSE f_Rcpp_Hash(lst3) [1] FALSE _

次に、unitsパラメータをrelativeに設定したベンチマークをいくつか示します。

_microbenchmark(check = 'equal', times = 10 , unit = "relative" , f_ThomsIsCoding(lst3) , f_chinsoon12(lst3) , f_GKi_6a(lst3) , f_GKi_6b(lst3) , f_Rcpp_Hash(lst3)) Unit: relative expr min lq mean median uq max neval f_ThomsIsCoding(lst3) 84.882393 63.541468 55.741646 57.894564 56.732118 33.142979 10 f_chinsoon12(lst3) 31.984571 24.320220 22.148787 22.393368 23.599284 15.211029 10 f_GKi_6a(lst3) 7.207269 5.978577 5.431342 5.761809 5.852944 3.439283 10 f_GKi_6b(lst3) 7.399280 5.751190 6.350720 5.484894 5.893290 8.035091 10 f_Rcpp_Hash(lst3) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 10 microbenchmark(check = 'equal', times = 10 , unit = "relative" , f_ThomsIsCoding(lst) , f_chinsoon12(lst) , f_GKi_6a(lst) , f_GKi_6b(lst) , f_Rcpp_Hash(lst)) Unit: relative expr min lq mean median uq max neval f_ThomsIsCoding(lst) 199.776328 202.318938 142.909407 209.422530 91.753335 85.090838 10 f_chinsoon12(lst) 9.542780 8.983248 6.755171 9.766027 4.903246 3.834358 10 f_GKi_6a(lst) 3.169508 3.158366 2.555443 3.731292 1.902140 1.649982 10 f_GKi_6b(lst) 2.992992 2.943981 2.019393 3.046393 1.315166 1.069585 10 f_Rcpp_Hash(lst) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 10 _

より大きな例では、最速のソリューションよりも3倍高速です。

これは何を意味するのでしょうか？

私にとって、この結果は、@ GKi、@ chinsoon12、@ Gregor、@ ThomasIsCodingなどによって表示される_base R_の美しさと効率性を物語っています。適度な速度を得るために、非常に具体的な_C++_を約100行書きました。公平を期すために、_base R_ソリューションは、ほとんどがコンパイル済みのコードを呼び出し、上記のようにハッシュテーブルを利用することになります。

GKi · Answer

並べ替え後、duplicatedおよびallを使用できます。

s <- lapply(lst, function(x) lapply(x, sort)) #Sort vectors s <- lapply(s, function(x) x[order(vapply(x, "[", 1, 1))]) #Sort lists all(duplicated(s)[-1]) #Test if there are all identical #length(unique(s)) == 1 #Alternative way to test if all are identical

代替案：1つのループで並べ替え

s <- lapply(lst, function(x) { tt <- lapply(x, sort) tt[order(vapply(tt, "[", 1, 1))] }) all(duplicated(s)[-1])

代替：ループ中にソートして早期終了を許可する

s <- lapply(lst[[1]], sort) s <- s[order(vapply(s, "[", 1, 1))] tt <- TRUE for(i in seq(lst)[-1]) { x <- lapply(lst[[i]], sort) x <- x[order(vapply(x, "[", 1, 1))] if(!identical(s, x)) { tt <- FALSE break; } } tt

またはsetequalを使用

s <- lapply(lst[[1]], sort) tt <- TRUE for(i in seq(lst)[-1]) { x <- lapply(lst[[i]], sort) if(!setequal(s, x)) { tt <- FALSE break; } } tt

または @ chinsoon12 のアイデアを少し改善して、リストをベクトルと交換します！

s <- lst[[1]][order(vapply(lst[[1]], min, 1))] s <- rep(seq_along(s), lengths(s))[order(unlist(s))] tt <- TRUE for(i in seq(lst)[-1]) { x <- lst[[i]][order(vapply(lst[[i]], min, 1))] x <- rep(seq_along(x), lengths(x))[order(unlist(x))] if(!identical(s, x)) {tt <- FALSE; break;} } tt

または2番目のorderを避けます

s <- lst[[1]][order(vapply(lst[[1]], min, 1))] s <- rep(seq_along(s), lengths(s))[order(unlist(s))] y <- s tt <- TRUE for(i in seq(lst)[-1]) { x <- lst[[i]][order(vapply(lst[[i]], min, 1))] y <- y[0] y[unlist(x)] <- rep(seq_along(x), lengths(x)) if(!identical(s, y)) {tt <- FALSE; break;} } tt

またはorderをmatch（またはfmatch）と交換します

x <- lst[[1]] s <- "[<-"(integer(),unlist(x),rep(seq_along(x), lengths(x))) s <- match(s, unique(s)) tt <- TRUE for(i in seq(lst)[-1]) { x <- lst[[i]] y <- "[<-"(integer(),unlist(x),rep(seq_along(x), lengths(x))) y <- match(y, unique(y)) if(!identical(s, y)) {tt <- FALSE; break;} } tt

または早期終了なし。

s <- lapply(lst, function(x) { y <- "[<-"(integer(),unlist(x),rep(seq_along(x), lengths(x))) match(y, unique(y)) }) all(duplicated(s)[-1])

またはC++で書かれた

sourceCpp(code = "#include <Rcpp.h> #include <vector> using namespace Rcpp; // [[Rcpp::plugins(cpp11)]] // [[Rcpp::export]] bool f_GKi_6_Rcpp(const List &x) { const List &x0 = x[0]; const unsigned int n = x0.length(); unsigned int nn = 0; for (List const &i : x0) {nn += i.length();} std::vector<int> s(nn); for (unsigned int i=0; i<n; ++i) { const IntegerVector &v = x0[i]; for (int const &j : v) { if(j > nn) return false; s[j-1] = i; } } { std::vector<int> lup(n, -1); int j = 0; for(int &i : s) { if(lup[i] < 0) {lup[i] = j++;} i = lup[i]; } } for (List const &i : x) { if(i.length() != n) return false; std::vector<int> sx(nn); for(unsigned int j=0; j<n; ++j) { const IntegerVector &v = i[j]; for (int const &k : v) { if(k > nn) return false; sx[k-1] = j; } } { std::vector<int> lup(n, -1); int j = 0; for(int &i : sx) { int &lupp = lup[i]; if(lupp == -1) {lupp = j; i = j++; } else {i = lupp;} } } if(s!=sx) return false; } return true; } ")

回答を改善するためのヒントを@Gregorに感謝します！

GKi · Answer

パフォーマンス：

library(microbenchmark) microbenchmark(check = 'equal', times=10 , f_ThomsIsCoding(lst1) , f_chinsoon12(lst1) , f_GKi_6a(lst1) , f_GKi_6b(lst1) , f_GKi_6_Rcpp(lst1) , f_Rcpp_Hash(lst1)) #Unit: microseconds # expr min lq mean median uq max neval # f_ThomsIsCoding(lst1) 161187.790 162453.520 167107.5739 167899.471 169441.028 174746.156 10 # f_chinsoon12(lst1) 64380.792 64938.528 66983.9449 67357.924 68487.438 69201.032 10 # f_GKi_6a(lst1) 8833.595 9201.744 10377.5844 9407.864 12145.926 14662.022 10 # f_GKi_6b(lst1) 8815.592 8913.950 9877.4948 9112.924 10941.261 12553.845 10 # f_GKi_6_Rcpp(lst1) 394.754 426.489 539.1494 439.644 451.375 1327.885 10 # f_Rcpp_Hash(lst1) 327.665 374.409 499.4080 398.101 495.034 1198.674 10 microbenchmark(check = 'equal', times=10 , f_ThomsIsCoding(lst2) , f_chinsoon12(lst2) , f_GKi_6a(lst2) , f_GKi_6b(lst2) , f_GKi_6_Rcpp(lst2) , f_Rcpp_Hash(lst2)) #Unit: microseconds # expr min lq mean median uq max neval # f_ThomsIsCoding(lst2) 93808.603 99663.651 103358.2039 104676.1600 107124.879 107485.696 10 # f_chinsoon12(lst2) 131.320 147.192 192.5354 188.1935 205.053 337.062 10 # f_GKi_6a(lst2) 8630.970 9554.279 10681.9510 9753.2670 11970.377 13489.243 10 # f_GKi_6b(lst2) 39.736 47.916 61.3929 52.7755 63.026 110.808 10 # f_GKi_6_Rcpp(lst2) 43.017 51.022 72.8736 76.3465 86.527 116.060 10 # f_Rcpp_Hash(lst2) 3.667 4.237 20.5887 16.3000 18.031 96.728 10 microbenchmark(check = 'equal', times=10 , f_ThomsIsCoding(lst3) , f_chinsoon12(lst3) , f_GKi_6a(lst3) , f_GKi_6b(lst3) , f_GKi_6_Rcpp(lst3) , f_Rcpp_Hash(lst3)) #Unit: microseconds # expr min lq mean median uq max neval # f_ThomsIsCoding(lst3) 157660.501 166914.782 167067.2512 167204.9065 168055.941 177153.694 10 # f_chinsoon12(lst3) 139.157 181.019 183.9257 188.0950 198.249 211.860 10 # f_GKi_6a(lst3) 9484.496 9617.471 10709.3950 10056.1865 11812.037 12830.560 10 # f_GKi_6b(lst3) 33.583 36.338 47.1577 42.6540 63.469 66.640 10 # f_GKi_6_Rcpp(lst3) 60.010 60.455 89.4963 94.7220 104.271 121.431 10 # f_Rcpp_Hash(lst3) 4.404 5.518 9.9811 6.5115 17.396 20.090 10 microbenchmark(check = 'equal', times=10 , f_ThomsIsCoding(lst4) , f_chinsoon12(lst4) , f_GKi_6a(lst4) , f_GKi_6b(lst4) , f_GKi_6_Rcpp(lst4) , f_Rcpp_Hash(lst4)) #Unit: milliseconds # expr min lq mean median uq max neval # f_ThomsIsCoding(lst4) 1874.129146 1937.643431 2012.99077 2002.460746 2134.072981 2187.46886 10 # f_chinsoon12(lst4) 69.949917 74.393779 80.25362 76.595763 87.116571 100.57917 10 # f_GKi_6a(lst4) 23.259178 23.328548 27.62690 28.856612 30.675259 32.57509 10 # f_GKi_6b(lst4) 22.200969 22.326122 24.20769 23.023687 23.619360 31.74266 10 # f_GKi_6_Rcpp(lst4) 8.062451 8.228526 10.30559 8.363314 13.425531 13.80677 10 # f_Rcpp_Hash(lst4) 6.551370 6.586025 7.22958 6.724232 6.809745 11.97631 10

ライブラリ：

system.time(install.packages("Rcpp")) # User System verstrichen # 27.576 1.147 29.396 system.time(library(Rcpp)) # User System verstrichen # 0.070 0.000 0.071

機能：

system.time({f_ThomsIsCoding <- function(lst) { s <- Map(function(v) Map(sort,v),lst) length(setdiff(Reduce(union,s),Reduce(intersect,s)))==0 }}) # User System verstrichen # 0 0 0 #like GKi's solution to stop early when diff is detected system.time({f_chinsoon12 <- function(lst) { x <- lst[[1L]] y <- x[order(lengths(x), sapply(x, min))] a <- rep(seq_along(y), lengths(y))[order(unlist(y))] for(x in lst[-1L]) { y <- x[order(lengths(x), sapply(x, min))] a2 <- rep(seq_along(y), lengths(y))[order(unlist(y))] if(!identical(a, a2)) { return(FALSE) } } TRUE }}) # User System verstrichen # 0 0 0 system.time({f_GKi_6a <- function(lst) { all(duplicated(lapply(lst, function(x) { y <- "[<-"(integer(),unlist(x),rep(seq_along(x), lengths(x))) match(y, unique(y)) }))[-1]) }}) # User System verstrichen # 0 0 0 system.time({f_GKi_6b <- function(lst) { x <- lst[[1]] s <- "[<-"(integer(),unlist(x),rep(seq_along(x), lengths(x))) s <- match(s, unique(s)) for(i in seq(lst)[-1]) { x <- lst[[i]] y <- "[<-"(integer(),unlist(x),rep(seq_along(x), lengths(x))) y <- match(y, unique(y)) if(!identical(s, y)) return(FALSE) } TRUE }}) # User System verstrichen # 0 0 0 system.time({sourceCpp(code = "#include <Rcpp.h> #include <vector> using namespace Rcpp; // [[Rcpp::plugins(cpp11)]] // [[Rcpp::export]] bool f_GKi_6_Rcpp(const List &x) { const List &x0 = x[0]; const unsigned int n = x0.length(); unsigned int nn = 0; for (List const &i : x0) {nn += i.length();} std::vector<int> s(nn); for (unsigned int i=0; i<n; ++i) { const IntegerVector &v = x0[i]; for (int const &j : v) { if(j > nn) return false; s[j-1] = i; } } { std::vector<int> lup(n, -1); int j = 0; for(int &i : s) { if(lup[i] < 0) {lup[i] = j++;} i = lup[i]; } } for (List const &i : x) { if(i.length() != n) return false; std::vector<int> sx(nn); for(unsigned int j=0; j<n; ++j) { const IntegerVector &v = i[j]; for (int const &k : v) { if(k > nn) return false; sx[k-1] = j; } } { std::vector<int> lup(n, -1); int j = 0; for(int &i : sx) { int &lupp = lup[i]; if(lupp == -1) {lupp = j; i = j++; } else {i = lupp;} } } if(s!=sx) return false; } return true; } ")}) # User System verstrichen # 3.265 0.217 3.481 system.time({sourceCpp(code = "#include <Rcpp.h> using namespace Rcpp; // [[Rcpp::plugins(cpp11)]] void getNPrimes(std::vector<double> &logPrimes) { const int n = logPrimes.size(); const int limit = static_cast<int>(2.0 * static_cast<double>(n) * std::log(n)); std::vector<bool> sieve(limit + 1, true); int lastP = 3; const int fsqr = std::sqrt(static_cast<double>(limit)); while (lastP <= fsqr) { for (int j = lastP * lastP; j <= limit; j += 2 * lastP) sieve[j] = false; int ind = 2; for (int k = lastP + 2; !sieve[k]; k += 2) ind += 2; lastP += ind; } logPrimes[0] = std::log(2.0); for (int i = 3, j = 1; i <= limit && j < n; i += 2) if (sieve[i]) logPrimes[j++] = std::log(static_cast<double>(i)); } // [[Rcpp::export]] bool f_Rcpp_Hash(List x) { List tempLst = x[0]; const int n = tempLst.length(); int myMax = 0; // Find the max so we know how many primes to generate for (int i = 0; i < n; ++i) { IntegerVector v = tempLst[i]; const int tempMax = *std::max_element(v.cbegin(), v.cend()); if (tempMax > myMax) myMax = tempMax; } std::vector<double> logPrimes(myMax + 1, 0.0); getNPrimes(logPrimes); double sumMax = 0.0; for (int i = 0; i < n; ++i) { IntegerVector v = tempLst[i]; double mySum = 0.0; for (auto j: v) mySum += logPrimes[j]; if (mySum > sumMax) sumMax = mySum; } const uint64_t multiplier = std::numeric_limits<int>::max() / sumMax; std::unordered_set<uint64_t> Canon; Canon.reserve(n); for (int i = 0; i < n; ++i) { IntegerVector v = tempLst[i]; double mySum = 0.0; for (auto j: v) mySum += logPrimes[j]; Canon.insert(static_cast<uint64_t>(multiplier * mySum)); } const auto myEnd = Canon.end(); for (auto it = x.begin() + 1; it != x.end(); ++it) { List tempLst = *it; if (tempLst.length() != n) return false; for (int j = 0; j < n; ++j) { IntegerVector v = tempLst[j]; double mySum = 0.0; for (auto k: v) mySum += logPrimes[k]; const uint64_t key = static_cast<uint64_t>(multiplier * mySum); if (Canon.find(key) == myEnd) return false; } } return true; } ")}) # User System verstrichen # 3.507 0.155 3.662

データ：

lst1 <- list(list(1,c(2,3,4),c(5,6)) #TRUE , list(c(2,3,4),1,c(5,6)) , list(1,c(2,3,4),c(6,5))) lst2 <- list(list(c(2,3,4),c(1,5,6)) #FALSE , list(c(2,3,6),c(1,5,4)) , list(c(2,3,4),c(1,5,6))) lst3 <- list(list(1,c(2,3,4),c(5,6)) #FALSE , list(c(2,3,4),1,c(5,6)) , list(1,c(2,3,5),c(6,4))) set.seed(7) N <- 1e3 lst1 <- lst1[sample(seq(lst1), N, TRUE)] lst2 <- lst2[sample(seq(lst2), N, TRUE)] lst3 <- lst3[sample(seq(lst3), N, TRUE)] N <- 1000 M <- 500 l <- unname(split(1:N,findInterval(1:N,sort(sample(1:N,N/10)),left.open = T))) lst4 <- lapply(lapply(1:M, function(k) lapply(l, function(v) v[sample(seq_along(v),length(v))])), function(x) x[sample(seq_along(x),length(x))])

chinsoon12 · Answer

うまくいけば2回目はラッキー

f <- function(lst) { s <- lapply(lst, function(x) { y <- x[order(lengths(x), sapply(x, min))] rep(seq_along(y), lengths(y))[order(unlist(y))] }) length(unique(s))==1L }

テストケース：

# should return `TRUE` lst1 <- list(list(1,c(2,3,4),c(5,6)), list(c(2,3,4),1,c(5,6)), list(1,c(2,3,4),c(6,5))) # should return `TRUE` lst2 <- list(list(1:2, 3:4), list(3:4, 1:2)) # should return `FALSE` lst3 <- list(list(1,c(2,3,4),c(5,6)), list(c(2,3,4),1,c(5,6)), list(1,c(2,3,5),c(6,4))) # should return `FALSE` lst4 <- list(list(c(2,3,4),c(1,5,6)), list(c(2,3,6),c(1,5,4)), list(c(2,3,4),c(1,5,6))) lst5 <- list(list(1,c(2,3,4),c(5,6)) #TRUE , list(c(2,3,4),1,c(5,6)) , list(1,c(2,3,4),c(6,5))) lst6 <- list(list(c(2,3,4),c(1,5,6)) #FALSE , list(c(2,3,6),c(1,5,4)) , list(c(2,3,4),c(1,5,6))) lst7 <- list(list(1,c(2,3,4),c(5,6)) #FALSE , list(c(2,3,4),1,c(5,6)) , list(1,c(2,3,5),c(6,4)))

チェック：

f(lst1) #[1] TRUE f(lst2) #[1] TRUE f(lst3) #[1] FALSE f(lst4) #[1] FALSE f(lst5) #[1] TRUE f(lst6) #[1] FALSE f(lst7) #[1] FALSE

タイミングコード：

library(microbenchmark) set.seed(0L) N <- 1000 M <- 100 l <- unname(split(1:N,findInterval(1:N,sort(sample(1:N,N/10)),left.open = T))) lst <- lapply(lapply(1:M, function(k) lapply(l, function(v) v[sample(seq_along(v),length(v))])), function(x) x[sample(seq_along(x),length(x))]) f_ThomsIsCoding <- function(lst) { s <- Map(function(v) Map(sort,v),lst) length(setdiff(Reduce(union,s),Reduce(intersect,s)))==0 } f_GKi_1 <- function(lst) { all(duplicated(lapply(lst, function(x) lapply(x, sort)[order(unlist(lapply(x, min)))]))[-1]) } f_GKi_2 <- function(lst) { s <- lapply(lst, function(x) lapply(x, sort)) all(duplicated(lapply(s, function(x) x[order(unlist(lapply(x, "[", 1)))]))[-1]) } f <- function(lst) { s <- lapply(lst, function(x) { y <- x[order(lengths(x), sapply(x, min))] rep(seq_along(y), lengths(y))[order(unlist(y))] }) length(unique(s))==1L } microbenchmark(times=3L, f_ThomsIsCoding(lst), f_GKi_1(lst), f_GKi_2(lst), f(lst) )

タイミング：

Unit: milliseconds expr min lq mean median uq max neval f_ThomsIsCoding(lst) 333.77313 334.61662 348.37474 335.46010 355.67555 375.8910 3 f_GKi_1(lst) 324.12827 324.66580 326.33016 325.20332 327.43111 329.6589 3 f_GKi_2(lst) 315.73533 316.05770 333.35910 316.38007 342.17099 367.9619 3 f(lst) 12.42986 14.08256 15.74231 15.73526 17.39853 19.0618 3

リスト内のリストが同等であるかどうかを確認するより速い方法はありますか？

算術の基本定理 を入力します

算術の基本定理を入力します