Вроде стало похоже на правду

2023-11-20 23:01:54 +03:00 · 2023-11-20 23:01:54 +03:00 · 113176257f
parent d091cc4335
commit 113176257f
3 changed files with 111 additions and 29 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -5,11 +5,12 @@
        "version": "0.2.0",
        "configurations": [
                {
-                        "name": "Python: main8.py",
+                        "name": "Python: ищи-русские-имена-в-коде.py",
                        "type": "python",
                        "request": "launch",
-                        "program": "main8.py",
+                        "program": "ищи-русские-имена-в-коде.py",
                        "console": "integratedTerminal",
                        "cwd": "${workspaceFolder}/иривк",
                        "justMyCode": true
                }
        ]
--- a/иривк/repos.txt
+++ b/иривк/repos.txt
@ -1,3 +1,70 @@
 https://github.com/budden/dlist
 https://github.com/DonCuponesInternet/rails_admin 
 https://github.com/aravindgd/rails_admin 
 https://github.com/athompson11/Soteria-v3 
 https://github.com/InsaneHyena/tgstation 
 https://github.com/Boggart/-tg-station 
 https://github.com/HippieStation/HippieStation13 
 https://github.com/ExcessiveUseOfCobblestone/tgstation 
 https://github.com/Judopay/Judo-Ruby 
 https://github.com/prgTW/monolog 
 https://github.com/bevis-ui/bevis-and-bt-speech 
 https://github.com/gdamjan/vezilka 
 https://github.com/LopatkinEvgeniy/robot-parser 
 https://github.com/kimshrier/elixir 
 https://github.com/Suomaa/FTT 
 https://github.com/dle-modules/DLE-Charset-Converter 
 https://github.com/Felix0830/gitextensions 
 https://github.com/thexide/JavaScript 
 https://github.com/istrel/basisjs 
 https://github.com/p2rv/Univer 
 https://github.com/ruLait/wp-steam-shortcode 
 https://github.com/Alexponomarev7/plotter 
 https://github.com/dosvid/landing 
 https://github.com/vlascoder/otrs 
 https://github.com/krf/kdevplatform 
 https://github.com/Nukkit/Nukkit 
 https://github.com/vsuh/1S_unloads 
 https://github.com/alshalan/Mobile-OpenVPN 
 https://github.com/LionZXY/HackathonBMSTU 
 https://github.com/splitice/Elastica 
 https://github.com/lolosoft/CashBook 
 https://github.com/fredformout/InstagramKit 
 https://github.com/nin-jin/pms-jin 
 https://github.com/mcepl/youtube-dl 
 https://github.com/FreeZbe/ACE3 
 https://github.com/nikolauska/ACE3 
 https://github.com/ddiachkov/chrno_audit 
 https://github.com/pershoot/vision-2635 
 https://github.com/byakatat/selenium-training 
 https://github.com/Flexberry/ember-flexberry-designer 
 https://github.com/otavioarc/freeCodeCamp 
 https://github.com/anketolog/AnketologClient-php 
 https://github.com/AKosterin/akosterin.github.io 
 https://github.com/fandrej/glonassd 
 https://github.com/Scorpibear/chegura 
 https://github.com/mentatDemon/TOPP_TC 
 https://github.com/joncol/jcon 
 https://github.com/kerneldevs/caf-kernel 
 https://github.com/nasser-embedded/linux 
 https://github.com/fmaker/kernel_msm 
 https://github.com/galaxys-cm7miui-kernel/ICS-kernel-SGS 
 https://github.com/coolya/android_kernel_samsung_msm 
 https://github.com/pacificIT/linux-2.6.36 
 https://github.com/DerTeufel/cm7 
 https://github.com/SergOmarov/Hight-level-library-for-Lua 
 https://github.com/EKOsh/TeleMonBot 
 https://github.com/expdevelop/d812 
 https://github.com/ms301/TelegraphAPI 
 https://github.com/johnner/tran 
 https://github.com/esclkm/pagemasseditor 
 https://github.com/EvercodeLab/EvercodeHipchatMonologBundle 
 https://github.com/JeffPyeBrook/WP-e-Commerce 
 https://github.com/ticketmaster-api/ticketmaster-api.github.io 
 https://github.com/dreikanter/boodka 
 https://github.com/daveloyall/urbit 
 https://github.com/LK4D4/criu 
 https://github.com/rbabichev/Astrafit 
 https://github.com/mishakos/InsuranceSystem.Library 
 https://github.com/vapkarian/soccer-analyzer 
 https://github.com/TrayEdge/FloatingActionButton 
--- a/иривк/ищи-русские-имена-в-коде.py
+++ b/иривк/ищи-русские-имена-в-коде.py
@ -1,11 +1,12 @@
 import os
 import pathlib
 import re
 import requests
 from subprocess import call
 import threading
 import pygments
-from pygments.token import Text
+from pygments.lexers import get_lexer_for_filename
-from pygments import lex
+import pygments.token 
 from concurrent.futures import ThreadPoolExecutor
 ### Проверен на python 3.7.5
@ -18,19 +19,23 @@ from concurrent.futures import ThreadPoolExecutor
 возможныеПутиК_README_mdВнутриРепозитория = ["/blob/main/README.md", "/blob/master/README.md", "/README.md", "/", ""]    
 найденныеЯзыкиКоторыеМыНеЗаказывали = []
-интересныеЯзыки = ['Ruby', 'VB.net', 'GLSL', 'Perl', 'PHP', 'Python', 'Common Lisp', 'OCaml', 'Java', 'C#', 'JavaScript', 'C', 'C++', 'Prolog', 'Go', 'Rust', 'Scheme', 'Transact-SQL', 'PL-SQL', 'tsql', 'PL/1', 'plsql', 'pli', 'Pascal', 'Delphi', 'Modula-2']
+интересныеЯзыки = ['Ruby', 'VB.net', 'GLSL', 'Perl', 'PHP', 'Python', 'Common Lisp', 'OCaml', 'Java', 
    'C#', 'JavaScript', 'C', 'C++', 'Prolog', 'Go', 'Rust', 'Scheme', 'Transact-SQL', 'PL-SQL', 'tsql', 'PL/1', 'plsql', 'pli', 'Pascal', 'Delphi', 'Modula-2']
 неинтересныеРасширенияФайлов = ['.md','.txt','.html','.xml','.XML','.json']
-def НайденЯзыкКоторыйМыНеЗаказывали(lexer_name, url, log):
+def НайденЯзыкКоторыйМыНеЗаказывали(lexer_name, url, log, файлДляНезаказанныхЯзыков):
    if lexer_name not in найденныеЯзыкиКоторыеМыНеЗаказывали:
        найденныеЯзыкиКоторыеМыНеЗаказывали.append(lexer_name)
-        log.write(f"{url} - Лексер определил язык, который не включеён в список разрешённых. {lexer.name}  \n")
+        log.write(f"{url} - Лексер определил язык, который не включён в список разрешённых. {lexer_name}  \n")
-        print(f"{url} - Лексер определил язык, который не включеён в список разрешённых. {lexer.name} ")
+        print(f"{url} - Лексер определил язык, который не включён в список разрешённых. {lexer_name} ")
        файлДляНезаказанныхЯзыков.write("%s\n" % lexer_name)
 def download_repo(url, log):
    httpsPrefix = "https://github.com/"
    assert(url.startswith(httpsPrefix))
-    repo_dir = "cloned_repos/" + url.split('/')[3:]
+    repo_dir = os.path.join("cloned_repos",*url.split('/')[3:])
    gitUrl = url.replace(httpsPrefix, "git@github.com:")
    try:
        call(['git', 'clone', '--depth', '1', gitUrl, repo_dir])
@ -55,12 +60,11 @@ def analyze_readme(url, log):
                else:
                    return 1
        except Exception as e:
            log.write(f"{readme_url} - Не найден README.  \n")
            print(f"{readme_url} - Не найден README.")
    return 0
-def analyze_repo(url, log):
+def analyze_repo(url, log, файлДляНезаказанныхЯзыков):
    try:
            print(f"{url} STP загрузуа и анализ README")
            res = analyze_readme(url, log)
@ -76,31 +80,42 @@ def analyze_repo(url, log):
                    for file in files:
                        file_path = os.path.join(root, file)
                        file_ext = os.path.splitext(file_path)[1]
                        неинтересноеРасширение = False
                        for расш in неинтересныеРасширенияФайлов:
                            if file_ext.endswith(расш):
                                неинтересноеРасширение = True
                                break
                        if неинтересноеРасширение:
                            continue
                        lexer = None
-                        try:
+                        if file_ext:
-                            lexer = pygments.lexers.get_lexer_for_filename(file_ext)
+                            try:
-                        except:
+                                lexer = get_lexer_for_filename(file_ext)
-                            log.write(f"{url}...{file_ext} - Лексер не определил язык. \n")
+                            except:
-                            print(f"{url}...{file_ext} - Лексер не определил язык. ")
+                                print(f"{url}...{file_ext} - Лексер не определил язык. ")
-                            pass
+                                lexer = None
-                        if lexer and not(lexer.name in интересныеЯзыки):
+                        if lexer is None:
-                            НайденЯзыкКоторыйМыНеЗаказывали(lexer.name, url, log)
+                            continue
-                        if file_ext and lexer and (lexer.name in интересныеЯзыки) and not file_ext.endswith(".md"):
+                        if not(lexer.name in интересныеЯзыки):
-                                #with open(file_path, 'r', encoding='utf-8') as f:
+                            НайденЯзыкКоторыйМыНеЗаказывали(lexer.name, url, log, файлДляНезаказанныхЯзыков)
                        if (lexer.name in интересныеЯзыки):
                                def ИщиРусскиеИменаВТакойКодировке(encoding):
                                    try:
                                        with open(file_path, 'r', encoding=encoding, errors = 'ignore') as f:
                                            content = f.read()
                                        if not re.search('[а-яА-ЯёЁ]',content):
                                            return False
-                                        with open(file_path, 'r') as f:
+                                        with open(file_path, 'r', encoding=encoding, errors = 'ignore') as f:
-                                            for token, value in lex(f.read(), lexer):
+                                            лексемы = pygments.lex(f.read(), lexer)
-                                                if token is pygments.token.Name:
+                                            for token, value in лексемы:
                                                # print(token)
                                                if pygments.token.is_token_subtype(token, pygments.token.Name):
                                                    if re.search('[а-яА-ЯёЁ]', value):
                                                        return True
                                        return False
                                    except:
                                        log.write(f"{url} - Ошибка при разборе файла. \n")
                                        print(f"{url} - Ошибка при разборе файла.")
                                        return False
@ -112,6 +127,7 @@ def analyze_repo(url, log):
                print(f"{url} - Не обнаруженно файлов содержащих русские символы.")
            else:
                log.write(f"{url} - Русский язык был найден в этом репозитории: {files_with_russian}  \n")
                log.flush()
                print(f"{url} - Русский язык был найден в этом репозитории: {files_with_russian}")
                return
@ -123,15 +139,13 @@ def analyze_repo(url, log):
 def main():
    # Чтение ссылок из файла
    with open("ЯзыкиКоторыеМыНеЗаказывали.txt", "w") as файлДляНезаказанныхЯзыков:
        with open("repos.txt", "r") as file:
            urls = file.readlines()
            urls = [url.strip() for url in urls]
        with open("log.txt", "w") as log:
            for url in urls:
-                analyze_repo(url,log)
+                analyze_repo(url,log,файлДляНезаказанныхЯзыков)
 main()
 print(f"Другие незамеченные языки: {найденныеЯзыкиКоторыеМыНеЗаказывали}")
 with open("NotFound.txt", "w") as log:
 	log.write(f'Другие незамеченные языки: {найденныеЯзыкиКоторыеМыНеЗаказывали} \n')