🤖 自动格式化代码 [skip ci]
This commit is contained in:
@@ -58,7 +58,9 @@ def hash_deduplicate(
|
|||||||
# 保存去重后的三元组
|
# 保存去重后的三元组
|
||||||
new_triple_list_data = {}
|
new_triple_list_data = {}
|
||||||
|
|
||||||
for _, (raw_paragraph, triple_list) in enumerate(zip(raw_paragraphs.values(), triple_list_data.values())):
|
for _, (raw_paragraph, triple_list) in enumerate(
|
||||||
|
zip(raw_paragraphs.values(), triple_list_data.values(), strict=False)
|
||||||
|
):
|
||||||
# 段落hash
|
# 段落hash
|
||||||
paragraph_hash = get_sha256(raw_paragraph)
|
paragraph_hash = get_sha256(raw_paragraph)
|
||||||
if f"{PG_NAMESPACE}-{paragraph_hash}" in stored_pg_hashes and paragraph_hash in stored_paragraph_hashes:
|
if f"{PG_NAMESPACE}-{paragraph_hash}" in stored_pg_hashes and paragraph_hash in stored_paragraph_hashes:
|
||||||
|
|||||||
@@ -174,7 +174,7 @@ def main(): # sourcery skip: comprehension-to-generator, extract-method
|
|||||||
with ThreadPoolExecutor(max_workers=workers) as executor:
|
with ThreadPoolExecutor(max_workers=workers) as executor:
|
||||||
future_to_hash = {
|
future_to_hash = {
|
||||||
executor.submit(process_single_text, pg_hash, raw_data, llm_client_list): pg_hash
|
executor.submit(process_single_text, pg_hash, raw_data, llm_client_list): pg_hash
|
||||||
for pg_hash, raw_data in zip(all_sha256_list, all_raw_datas)
|
for pg_hash, raw_data in zip(all_sha256_list, all_raw_datas, strict=False)
|
||||||
}
|
}
|
||||||
|
|
||||||
with Progress(
|
with Progress(
|
||||||
|
|||||||
@@ -354,7 +354,7 @@ class VirtualLogDisplay:
|
|||||||
|
|
||||||
# 为每个部分应用正确的标签
|
# 为每个部分应用正确的标签
|
||||||
current_len = 0
|
current_len = 0
|
||||||
for part, tag_name in zip(parts, tags):
|
for part, tag_name in zip(parts, tags, strict=False):
|
||||||
start_index = f"{start_pos}+{current_len}c"
|
start_index = f"{start_pos}+{current_len}c"
|
||||||
end_index = f"{start_pos}+{current_len + len(part)}c"
|
end_index = f"{start_pos}+{current_len + len(part)}c"
|
||||||
self.text_widget.tag_add(tag_name, start_index, end_index)
|
self.text_widget.tag_add(tag_name, start_index, end_index)
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ class ExpressionLearner:
|
|||||||
min_len = min(len(s1), len(s2))
|
min_len = min(len(s1), len(s2))
|
||||||
if min_len < 5:
|
if min_len < 5:
|
||||||
return False
|
return False
|
||||||
same = sum(1 for a, b in zip(s1, s2) if a == b)
|
same = sum(1 for a, b in zip(s1, s2, strict=False) if a == b)
|
||||||
return same / min_len > 0.8
|
return same / min_len > 0.8
|
||||||
|
|
||||||
async def learn_and_store_expression(self) -> List[Tuple[str, str, str]]:
|
async def learn_and_store_expression(self) -> List[Tuple[str, str, str]]:
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ EMBEDDING_SIM_THRESHOLD = 0.99
|
|||||||
|
|
||||||
def cosine_similarity(a, b):
|
def cosine_similarity(a, b):
|
||||||
# 计算余弦相似度
|
# 计算余弦相似度
|
||||||
dot = sum(x * y for x, y in zip(a, b))
|
dot = sum(x * y for x, y in zip(a, b, strict=False))
|
||||||
norm_a = math.sqrt(sum(x * x for x in a))
|
norm_a = math.sqrt(sum(x * x for x in a))
|
||||||
norm_b = math.sqrt(sum(x * x for x in b))
|
norm_b = math.sqrt(sum(x * x for x in b))
|
||||||
if norm_a == 0 or norm_b == 0:
|
if norm_a == 0 or norm_b == 0:
|
||||||
@@ -285,7 +285,7 @@ class EmbeddingStore:
|
|||||||
distances = list(distances.flatten())
|
distances = list(distances.flatten())
|
||||||
result = [
|
result = [
|
||||||
(self.idx2hash[str(int(idx))], float(sim))
|
(self.idx2hash[str(int(idx))], float(sim))
|
||||||
for (idx, sim) in zip(indices, distances)
|
for (idx, sim) in zip(indices, distances, strict=False)
|
||||||
if idx in range(len(self.idx2hash))
|
if idx in range(len(self.idx2hash))
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -819,7 +819,7 @@ class EntorhinalCortex:
|
|||||||
timestamps = sample_scheduler.get_timestamp_array()
|
timestamps = sample_scheduler.get_timestamp_array()
|
||||||
# 使用 translate_timestamp_to_human_readable 并指定 mode="normal"
|
# 使用 translate_timestamp_to_human_readable 并指定 mode="normal"
|
||||||
readable_timestamps = [translate_timestamp_to_human_readable(ts, mode="normal") for ts in timestamps]
|
readable_timestamps = [translate_timestamp_to_human_readable(ts, mode="normal") for ts in timestamps]
|
||||||
for _, readable_timestamp in zip(timestamps, readable_timestamps):
|
for _, readable_timestamp in zip(timestamps, readable_timestamps, strict=False):
|
||||||
logger.debug(f"回忆往事: {readable_timestamp}")
|
logger.debug(f"回忆往事: {readable_timestamp}")
|
||||||
chat_samples = []
|
chat_samples = []
|
||||||
for timestamp in timestamps:
|
for timestamp in timestamps:
|
||||||
|
|||||||
@@ -299,7 +299,7 @@ class ActionModifier:
|
|||||||
task_results = await asyncio.gather(*tasks, return_exceptions=True)
|
task_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
# 处理结果并更新缓存
|
# 处理结果并更新缓存
|
||||||
for _, (action_name, result) in enumerate(zip(task_names, task_results)):
|
for _, (action_name, result) in enumerate(zip(task_names, task_results, strict=False)):
|
||||||
if isinstance(result, Exception):
|
if isinstance(result, Exception):
|
||||||
logger.error(f"{self.log_prefix}LLM判定action {action_name} 时出错: {result}")
|
logger.error(f"{self.log_prefix}LLM判定action {action_name} 时出错: {result}")
|
||||||
results[action_name] = False
|
results[action_name] = False
|
||||||
|
|||||||
@@ -974,7 +974,7 @@ def weighted_sample_no_replacement(items, weights, k) -> list:
|
|||||||
2. 不会重复选中同一个元素
|
2. 不会重复选中同一个元素
|
||||||
"""
|
"""
|
||||||
selected = []
|
selected = []
|
||||||
pool = list(zip(items, weights))
|
pool = list(zip(items, weights, strict=False))
|
||||||
for _ in range(min(k, len(pool))):
|
for _ in range(min(k, len(pool))):
|
||||||
total = sum(w for _, w in pool)
|
total = sum(w for _, w in pool)
|
||||||
r = random.uniform(0, total)
|
r = random.uniform(0, total)
|
||||||
|
|||||||
@@ -363,7 +363,7 @@ class ChineseTypoGenerator:
|
|||||||
else:
|
else:
|
||||||
# 处理多字词的单字替换
|
# 处理多字词的单字替换
|
||||||
word_result = []
|
word_result = []
|
||||||
for _, (char, py) in enumerate(zip(word, word_pinyin)):
|
for _, (char, py) in enumerate(zip(word, word_pinyin, strict=False)):
|
||||||
# 词中的字替换概率降低
|
# 词中的字替换概率降低
|
||||||
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
|
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
|
||||||
|
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ class ConfigBase:
|
|||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Expected {len(field_type_args)} items for {field_type.__name__}, got {len(value)}"
|
f"Expected {len(field_type_args)} items for {field_type.__name__}, got {len(value)}"
|
||||||
)
|
)
|
||||||
return tuple(cls._convert_field(item, arg) for item, arg in zip(value, field_type_args))
|
return tuple(cls._convert_field(item, arg) for item, arg in zip(value, field_type_args, strict=False))
|
||||||
|
|
||||||
if field_origin_type is dict:
|
if field_origin_type is dict:
|
||||||
# 检查提供的value是否为dict
|
# 检查提供的value是否为dict
|
||||||
|
|||||||
@@ -247,7 +247,7 @@ def weighted_sample_no_replacement(items, weights, k) -> list:
|
|||||||
2. 不会重复选中同一个元素
|
2. 不会重复选中同一个元素
|
||||||
"""
|
"""
|
||||||
selected = []
|
selected = []
|
||||||
pool = list(zip(items, weights))
|
pool = list(zip(items, weights, strict=False))
|
||||||
for _ in range(min(k, len(pool))):
|
for _ in range(min(k, len(pool))):
|
||||||
total = sum(w for _, w in pool)
|
total = sum(w for _, w in pool)
|
||||||
r = random.uniform(0, total)
|
r = random.uniform(0, total)
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ class SearchKnowledgeTool(BaseTool):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
|
def _cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
|
||||||
"""计算两个向量之间的余弦相似度"""
|
"""计算两个向量之间的余弦相似度"""
|
||||||
dot_product = sum(p * q for p, q in zip(vec1, vec2))
|
dot_product = sum(p * q for p, q in zip(vec1, vec2, strict=False))
|
||||||
magnitude1 = math.sqrt(sum(p * p for p in vec1))
|
magnitude1 = math.sqrt(sum(p * p for p in vec1))
|
||||||
magnitude2 = math.sqrt(sum(q * q for q in vec2))
|
magnitude2 = math.sqrt(sum(q * q for q in vec2))
|
||||||
if magnitude1 == 0 or magnitude2 == 0:
|
if magnitude1 == 0 or magnitude2 == 0:
|
||||||
|
|||||||
Reference in New Issue
Block a user